From 5ab4d94d9499831968804502c43ba7c3e64c0199 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 18 Aug 2024 21:53:40 -0700 Subject: [PATCH] Logging Level Update (#2165) --- backend/danswer/auth/users.py | 8 ++-- .../danswer/background/celery/celery_app.py | 2 +- .../danswer/background/celery/celery_utils.py | 2 +- .../danswer/background/connector_deletion.py | 4 +- .../background/indexing/run_indexing.py | 10 ++-- backend/danswer/background/indexing/tracer.py | 8 ++-- backend/danswer/background/update.py | 4 +- backend/danswer/configs/constants.py | 4 ++ backend/danswer/connectors/blob/connector.py | 4 +- .../rate_limit_wrapper.py | 2 +- .../danswer/connectors/github/connector.py | 2 +- .../connectors/gmail/connector_auth.py | 2 +- backend/danswer/connectors/gong/connector.py | 4 +- .../connectors/google_drive/connector.py | 2 +- .../connectors/google_drive/connector_auth.py | 2 +- backend/danswer/danswerbot/slack/constants.py | 1 - .../slack/handlers/handle_buttons.py | 3 +- .../slack/handlers/handle_message.py | 23 ++-------- .../slack/handlers/handle_regular_answer.py | 9 ++-- .../slack/handlers/handle_standard_answers.py | 5 +- backend/danswer/danswerbot/slack/listener.py | 34 +++++++------- backend/danswer/danswerbot/slack/utils.py | 16 ------- backend/danswer/db/credentials.py | 4 +- backend/danswer/db/document.py | 4 +- .../document_index/vespa/chunk_retrieval.py | 2 +- backend/danswer/document_index/vespa/index.py | 2 +- .../file_processing/extract_file_text.py | 2 +- backend/danswer/indexing/indexing_pipeline.py | 4 +- backend/danswer/llm/answering/answer.py | 2 +- .../danswer/llm/answering/prune_and_merge.py | 2 +- .../stream_processing/quotes_processing.py | 6 +-- backend/danswer/llm/chat_llm.py | 2 +- backend/danswer/llm/llm_initialization.py | 2 +- backend/danswer/main.py | 38 +++++++-------- .../search/postprocessing/postprocessing.py | 2 +- .../danswer/search/retrieval/search_runner.py | 2 +- .../server/middleware/latency_logging.py | 2 +- .../server/query_and_chat/query_backend.py | 8 ++-- .../server/query_and_chat/token_limit.py | 2 +- backend/danswer/tools/built_in_tools.py | 12 ++--- backend/danswer/utils/logger.py | 46 +++++++++++-------- backend/danswer/utils/timing.py | 3 +- .../danswer/utils/variable_functionality.py | 2 +- backend/ee/danswer/auth/users.py | 2 +- .../ee/danswer/background/permission_sync.py | 2 +- .../server/enterprise_settings/store.py | 4 +- .../server/query_and_chat/chat_backend.py | 4 +- .../server/query_and_chat/query_backend.py | 4 +- backend/ee/danswer/server/saml.py | 2 +- backend/ee/danswer/server/seeding.py | 16 +++---- backend/model_server/custom_models.py | 4 +- backend/model_server/encoders.py | 4 +- backend/model_server/main.py | 16 +++---- backend/model_server/utils.py | 2 +- .../scripts/force_delete_connector_by_id.py | 16 +++---- backend/scripts/save_load_state.py | 6 +-- backend/shared_configs/configs.py | 2 +- 57 files changed, 183 insertions(+), 201 deletions(-) diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index 319d066be..ce3b8e88f 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -81,7 +81,7 @@ def verify_auth_setting() -> None: "User must choose a valid user authentication method: " "disabled, basic, or google_oauth" ) - logger.info(f"Using Auth Type: {AUTH_TYPE.value}") + logger.notice(f"Using Auth Type: {AUTH_TYPE.value}") def get_display_email(email: str | None, space_less: bool = False) -> str: @@ -214,7 +214,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): async def on_after_register( self, user: User, request: Optional[Request] = None ) -> None: - logger.info(f"User {user.id} has registered.") + logger.notice(f"User {user.id} has registered.") optional_telemetry( record_type=RecordType.SIGN_UP, data={"action": "create"}, @@ -224,14 +224,14 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): async def on_after_forgot_password( self, user: User, token: str, request: Optional[Request] = None ) -> None: - logger.info(f"User {user.id} has forgot their password. Reset token: {token}") + logger.notice(f"User {user.id} has forgot their password. Reset token: {token}") async def on_after_request_verify( self, user: User, token: str, request: Optional[Request] = None ) -> None: verify_email_domain(user.email) - logger.info( + logger.notice( f"Verification requested for user {user.id}. Verification token: {token}" ) diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py index e6507a8ba..1a185b62e 100644 --- a/backend/danswer/background/celery/celery_app.py +++ b/backend/danswer/background/celery/celery_app.py @@ -282,7 +282,7 @@ def check_for_cc_pair_deletion_task() -> None: cc_pairs = get_connector_credential_pairs(db_session) for cc_pair in cc_pairs: if should_kick_off_deletion_of_cc_pair(cc_pair, db_session): - logger.info(f"Deleting the {cc_pair.name} connector credential pair") + logger.notice(f"Deleting the {cc_pair.name} connector credential pair") cleanup_connector_credential_pair_task.apply_async( kwargs=dict( connector_id=cc_pair.connector.id, diff --git a/backend/danswer/background/celery/celery_utils.py b/backend/danswer/background/celery/celery_utils.py index 772862724..acc4ae8f6 100644 --- a/backend/danswer/background/celery/celery_utils.py +++ b/backend/danswer/background/celery/celery_utils.py @@ -92,7 +92,7 @@ def should_sync_doc_set(document_set: DocumentSet, db_session: Session) -> bool: logger.info(f"Document set '{document_set.id}' is already syncing. Skipping.") return False - logger.info(f"Document set {document_set.id} syncing now!") + logger.info(f"Document set {document_set.id} syncing now.") return True diff --git a/backend/danswer/background/connector_deletion.py b/backend/danswer/background/connector_deletion.py index 9c653a87f..908835649 100644 --- a/backend/danswer/background/connector_deletion.py +++ b/backend/danswer/background/connector_deletion.py @@ -185,11 +185,11 @@ def delete_connector_credential_pair( connector_id=connector_id, ) if not connector or not len(connector.credentials): - logger.debug("Found no credentials left for connector, deleting connector") + logger.info("Found no credentials left for connector, deleting connector") db_session.delete(connector) db_session.commit() - logger.info( + logger.notice( "Successfully deleted connector_credential_pair with connector_id:" f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs." ) diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py index 2e4cc71e8..d51c13a91 100644 --- a/backend/danswer/background/indexing/run_indexing.py +++ b/backend/danswer/background/indexing/run_indexing.py @@ -160,7 +160,7 @@ def _run_indexing( ) if INDEXING_TRACER_INTERVAL > 0: - logger.info(f"Memory tracer starting: interval={INDEXING_TRACER_INTERVAL}") + logger.debug(f"Memory tracer starting: interval={INDEXING_TRACER_INTERVAL}") tracer = DanswerTracer() tracer.start() tracer.snap() @@ -269,7 +269,7 @@ def _run_indexing( INDEXING_TRACER_INTERVAL > 0 and tracer_counter % INDEXING_TRACER_INTERVAL == 0 ): - logger.info( + logger.debug( f"Running trace comparison for batch {tracer_counter}. interval={INDEXING_TRACER_INTERVAL}" ) tracer.snap() @@ -285,7 +285,7 @@ def _run_indexing( run_dt=run_end_dt, ) except Exception as e: - logger.info( + logger.exception( f"Connector run ran into exception after elapsed time: {time.time() - start_time} seconds" ) # Only mark the attempt as a complete failure if this is the first indexing window. @@ -323,13 +323,13 @@ def _run_indexing( break if INDEXING_TRACER_INTERVAL > 0: - logger.info( + logger.debug( f"Running trace comparison between start and end of indexing. {tracer_counter} batches processed." ) tracer.snap() tracer.log_first_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES) tracer.stop() - logger.info("Memory tracer stopped.") + logger.debug("Memory tracer stopped.") if ( index_attempt_md.num_exceptions > 0 diff --git a/backend/danswer/background/indexing/tracer.py b/backend/danswer/background/indexing/tracer.py index d91775567..baad96230 100644 --- a/backend/danswer/background/indexing/tracer.py +++ b/backend/danswer/background/indexing/tracer.py @@ -48,9 +48,9 @@ class DanswerTracer: stats = self.snapshot.statistics("traceback") for s in stats[:numEntries]: - logger.info(f"Tracer snap: {s}") + logger.debug(f"Tracer snap: {s}") for line in s.traceback: - logger.info(f"* {line}") + logger.debug(f"* {line}") @staticmethod def log_diff( @@ -60,9 +60,9 @@ class DanswerTracer: ) -> None: stats = snap_current.compare_to(snap_previous, "traceback") for s in stats[:numEntries]: - logger.info(f"Tracer diff: {s}") + logger.debug(f"Tracer diff: {s}") for line in s.traceback.format(): - logger.info(f"* {line}") + logger.debug(f"* {line}") def log_previous_diff(self, numEntries: int) -> None: if not self.snapshot or not self.snapshot_prev: diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index e05cfbca6..b2cf0e6d1 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -380,7 +380,7 @@ def update_loop( # batch of documents indexed if db_embedding_model.cloud_provider_id is None: - logger.debug("Running a first inference to warm up embedding model") + logger.notice("Running a first inference to warm up embedding model") warm_up_bi_encoder( embedding_model=db_embedding_model, model_server_host=INDEXING_MODEL_SERVER_HOST, @@ -447,7 +447,7 @@ def update__main() -> None: set_is_ee_based_on_env_variable() init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME) - logger.info("Starting indexing service") + logger.notice("Starting indexing service") update_loop() diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 408ffb2f1..f30f8c0f9 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -13,6 +13,10 @@ DEFAULT_BOOST = 0 SESSION_KEY = "session" +# Used for logging +SLACK_CHANNEL_ID = "channel_id" + + # For chunking/processing chunks RETURN_SEPARATOR = "\n\r\n" SECTION_SEPARATOR = "\n\n" diff --git a/backend/danswer/connectors/blob/connector.py b/backend/danswer/connectors/blob/connector.py index b72941301..a664a3d76 100644 --- a/backend/danswer/connectors/blob/connector.py +++ b/backend/danswer/connectors/blob/connector.py @@ -56,7 +56,7 @@ class BlobStorageConnector(LoadConnector, PollConnector): Raises ValueError for unsupported bucket types. """ - logger.info( + logger.debug( f"Loading credentials for {self.bucket_name} or type {self.bucket_type}" ) @@ -220,7 +220,7 @@ class BlobStorageConnector(LoadConnector, PollConnector): yield batch def load_from_state(self) -> GenerateDocumentsOutput: - logger.info("Loading blob objects") + logger.debug("Loading blob objects") return self._yield_blob_objects( start=datetime(1970, 1, 1, tzinfo=timezone.utc), end=datetime.now(timezone.utc), diff --git a/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py b/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py index 8733ca66e..e3eeaaf61 100644 --- a/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py +++ b/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py @@ -56,7 +56,7 @@ class _RateLimitDecorator: sleep_cnt = 0 while len(self.call_history) == self.max_calls: sleep_time = self.sleep_time * (self.sleep_backoff**sleep_cnt) - logger.info( + logger.notice( f"Rate limit exceeded for function {func.__name__}. " f"Waiting {sleep_time} seconds before retrying." ) diff --git a/backend/danswer/connectors/github/connector.py b/backend/danswer/connectors/github/connector.py index 89e5de551..aa72a3bef 100644 --- a/backend/danswer/connectors/github/connector.py +++ b/backend/danswer/connectors/github/connector.py @@ -38,7 +38,7 @@ def _sleep_after_rate_limit_exception(github_client: Github) -> None: tzinfo=timezone.utc ) - datetime.now(tz=timezone.utc) sleep_time += timedelta(minutes=1) # add an extra minute just to be safe - logger.info(f"Ran into Github rate-limit. Sleeping {sleep_time.seconds} seconds.") + logger.notice(f"Ran into Github rate-limit. Sleeping {sleep_time.seconds} seconds.") time.sleep(sleep_time.seconds) diff --git a/backend/danswer/connectors/gmail/connector_auth.py b/backend/danswer/connectors/gmail/connector_auth.py index 9d01ac5f4..e518d5a50 100644 --- a/backend/danswer/connectors/gmail/connector_auth.py +++ b/backend/danswer/connectors/gmail/connector_auth.py @@ -50,7 +50,7 @@ def get_gmail_creds_for_authorized_user( try: creds.refresh(Request()) if creds.valid: - logger.info("Refreshed Gmail tokens.") + logger.notice("Refreshed Gmail tokens.") return creds except Exception as e: logger.exception(f"Failed to refresh gmail access token due to: {e}") diff --git a/backend/danswer/connectors/gong/connector.py b/backend/danswer/connectors/gong/connector.py index 9ad24a5c9..56c93f57d 100644 --- a/backend/danswer/connectors/gong/connector.py +++ b/backend/danswer/connectors/gong/connector.py @@ -81,10 +81,10 @@ class GongConnector(LoadConnector, PollConnector): for workspace in workspace_list: if workspace: - logger.info(f"Updating workspace: {workspace}") + logger.info(f"Updating Gong workspace: {workspace}") workspace_id = workspace_map.get(workspace) if not workspace_id: - logger.error(f"Invalid workspace: {workspace}") + logger.error(f"Invalid Gong workspace: {workspace}") if not self.continue_on_fail: raise ValueError(f"Invalid workspace: {workspace}") continue diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 34685c1cb..40a9b7343 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -267,7 +267,7 @@ def get_all_files_batched( yield from batch_generator( items=found_files, batch_size=batch_size, - pre_batch_yield=lambda batch_files: logger.info( + pre_batch_yield=lambda batch_files: logger.debug( f"Parseable Documents in batch: {[file['name'] for file in batch_files]}" ), ) diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py index 2aae8401a..ae08eca7f 100644 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ b/backend/danswer/connectors/google_drive/connector_auth.py @@ -50,7 +50,7 @@ def get_google_drive_creds_for_authorized_user( try: creds.refresh(Request()) if creds.valid: - logger.info("Refreshed Google Drive tokens.") + logger.notice("Refreshed Google Drive tokens.") return creds except Exception as e: logger.exception(f"Failed to refresh google drive access token due to: {e}") diff --git a/backend/danswer/danswerbot/slack/constants.py b/backend/danswer/danswerbot/slack/constants.py index 5eaf63c89..cf2b38032 100644 --- a/backend/danswer/danswerbot/slack/constants.py +++ b/backend/danswer/danswerbot/slack/constants.py @@ -6,7 +6,6 @@ FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID = "feedback-doc-button" IMMEDIATE_RESOLVED_BUTTON_ACTION_ID = "immediate-resolved-button" FOLLOWUP_BUTTON_ACTION_ID = "followup-button" FOLLOWUP_BUTTON_RESOLVED_ACTION_ID = "followup-resolved-button" -SLACK_CHANNEL_ID = "channel_id" VIEW_DOC_FEEDBACK_ID = "view-doc-feedback" GENERATE_ANSWER_BUTTON_ACTION_ID = "generate-answer-button" diff --git a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py index b7b7bea9f..732be8df9 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py @@ -1,4 +1,3 @@ -import logging from typing import Any from typing import cast @@ -134,7 +133,7 @@ def handle_generate_answer_button( receiver_ids=None, client=client.web_client, channel=channel_id, - logger=cast(logging.Logger, logger), + logger=logger, feedback_reminder_id=None, ) diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index cd97af968..e94786a77 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -1,15 +1,13 @@ import datetime -import logging -from typing import cast from slack_sdk import WebClient from slack_sdk.errors import SlackApiError from sqlalchemy.orm import Session +from danswer.configs.constants import SLACK_CHANNEL_ID from danswer.configs.danswerbot_configs import DANSWER_BOT_FEEDBACK_REMINDER from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI from danswer.danswerbot.slack.blocks import get_feedback_reminder_blocks -from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.handlers.handle_regular_answer import ( handle_regular_answer, ) @@ -17,7 +15,6 @@ from danswer.danswerbot.slack.handlers.handle_standard_answers import ( handle_standard_answers, ) from danswer.danswerbot.slack.models import SlackMessageInfo -from danswer.danswerbot.slack.utils import ChannelIdAdapter from danswer.danswerbot.slack.utils import fetch_user_ids_from_emails from danswer.danswerbot.slack.utils import fetch_user_ids_from_groups from danswer.danswerbot.slack.utils import respond_in_thread @@ -53,12 +50,8 @@ def send_msg_ack_to_user(details: SlackMessageInfo, client: WebClient) -> None: def schedule_feedback_reminder( details: SlackMessageInfo, include_followup: bool, client: WebClient ) -> str | None: - logger = cast( - logging.Logger, - ChannelIdAdapter( - logger_base, extra={SLACK_CHANNEL_ID: details.channel_to_respond} - ), - ) + logger = setup_logger(extra={SLACK_CHANNEL_ID: details.channel_to_respond}) + if not DANSWER_BOT_FEEDBACK_REMINDER: logger.info("Scheduled feedback reminder disabled...") return None @@ -97,10 +90,7 @@ def schedule_feedback_reminder( def remove_scheduled_feedback_reminder( client: WebClient, channel: str | None, msg_id: str ) -> None: - logger = cast( - logging.Logger, - ChannelIdAdapter(logger_base, extra={SLACK_CHANNEL_ID: channel}), - ) + logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) try: client.chat_deleteScheduledMessage( @@ -129,10 +119,7 @@ def handle_message( """ channel = message_info.channel_to_respond - logger = cast( - logging.Logger, - ChannelIdAdapter(logger_base, extra={SLACK_CHANNEL_ID: channel}), - ) + logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) messages = message_info.thread_messages sender_id = message_info.sender diff --git a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py index 74eac009c..b7a3b818c 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py @@ -1,5 +1,4 @@ import functools -import logging from collections.abc import Callable from typing import Any from typing import cast @@ -51,6 +50,7 @@ from danswer.search.enums import OptionalSearchSetting from danswer.search.models import BaseFilters from danswer.search.models import RetrievalDetails from danswer.search.search_settings import get_search_settings +from danswer.utils.logger import DanswerLoggingAdapter srl = SlackRateLimiter() @@ -83,7 +83,7 @@ def handle_regular_answer( receiver_ids: list[str] | None, client: WebClient, channel: str, - logger: logging.Logger, + logger: DanswerLoggingAdapter, feedback_reminder_id: str | None, num_retries: int = DANSWER_BOT_NUM_RETRIES, answer_generation_timeout: int = DANSWER_BOT_ANSWER_GENERATION_TIMEOUT, @@ -136,7 +136,6 @@ def handle_regular_answer( tries=num_retries, delay=0.25, backoff=2, - logger=logger, ) @rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to) def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | None: @@ -319,7 +318,7 @@ def handle_regular_answer( ) if answer.answer_valid is False: - logger.info( + logger.notice( "Answer was evaluated to be invalid, throwing it away without responding." ) update_emote_react( @@ -357,7 +356,7 @@ def handle_regular_answer( return True if not answer.answer and disable_docs_only_answer: - logger.info( + logger.notice( "Unable to find answer - not responding since the " "`DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER` env variable is set" ) diff --git a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py index 244df401d..8e1663c1a 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py @@ -1,5 +1,3 @@ -import logging - from slack_sdk import WebClient from sqlalchemy.orm import Session @@ -21,6 +19,7 @@ from danswer.db.models import SlackBotConfig from danswer.db.standard_answer import fetch_standard_answer_categories_by_names from danswer.db.standard_answer import find_matching_standard_answers from danswer.server.manage.models import StandardAnswer +from danswer.utils.logger import DanswerLoggingAdapter from danswer.utils.logger import setup_logger logger = setup_logger() @@ -61,7 +60,7 @@ def handle_standard_answers( receiver_ids: list[str] | None, slack_bot_config: SlackBotConfig | None, prompt: Prompt | None, - logger: logging.Logger, + logger: DanswerLoggingAdapter, client: WebClient, db_session: Session, ) -> bool: diff --git a/backend/danswer/danswerbot/slack/listener.py b/backend/danswer/danswerbot/slack/listener.py index af080d023..90aca4f68 100644 --- a/backend/danswer/danswerbot/slack/listener.py +++ b/backend/danswer/danswerbot/slack/listener.py @@ -10,6 +10,7 @@ from slack_sdk.socket_mode.response import SocketModeResponse from sqlalchemy.orm import Session from danswer.configs.constants import MessageType +from danswer.configs.constants import SLACK_CHANNEL_ID from danswer.configs.danswerbot_configs import DANSWER_BOT_REPHRASE_MESSAGE from danswer.configs.danswerbot_configs import DANSWER_BOT_RESPOND_EVERY_CHANNEL from danswer.configs.danswerbot_configs import NOTIFY_SLACKBOT_NO_ANSWER @@ -21,7 +22,6 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_I from danswer.danswerbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID -from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.constants import VIEW_DOC_FEEDBACK_ID from danswer.danswerbot.slack.handlers.handle_buttons import handle_doc_feedback_button from danswer.danswerbot.slack.handlers.handle_buttons import handle_followup_button @@ -39,7 +39,6 @@ from danswer.danswerbot.slack.handlers.handle_message import ( from danswer.danswerbot.slack.handlers.handle_message import schedule_feedback_reminder from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.tokens import fetch_tokens -from danswer.danswerbot.slack.utils import ChannelIdAdapter from danswer.danswerbot.slack.utils import decompose_action_id from danswer.danswerbot.slack.utils import get_channel_name_from_id from danswer.danswerbot.slack.utils import get_danswer_bot_app_id @@ -84,18 +83,18 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool event = cast(dict[str, Any], req.payload.get("event", {})) msg = cast(str | None, event.get("text")) channel = cast(str | None, event.get("channel")) - channel_specific_logger = ChannelIdAdapter( - logger, extra={SLACK_CHANNEL_ID: channel} - ) + channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) # This should never happen, but we can't continue without a channel since # we can't send a response without it if not channel: - channel_specific_logger.error("Found message without channel - skipping") + channel_specific_logger.warning("Found message without channel - skipping") return False if not msg: - channel_specific_logger.error("Cannot respond to empty message - skipping") + channel_specific_logger.warning( + "Cannot respond to empty message - skipping" + ) return False if ( @@ -185,9 +184,8 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool if req.type == "slash_commands": # Verify that there's an associated channel channel = req.payload.get("channel_id") - channel_specific_logger = ChannelIdAdapter( - logger, extra={SLACK_CHANNEL_ID: channel} - ) + channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) + if not channel: channel_specific_logger.error( "Received DanswerBot command without channel - skipping" @@ -230,7 +228,7 @@ def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: ) query_event_id, _, _ = decompose_action_id(feedback_id) - logger.info(f"Successfully handled QA feedback for event: {query_event_id}") + logger.notice(f"Successfully handled QA feedback for event: {query_event_id}") def build_request_details( @@ -247,15 +245,17 @@ def build_request_details( msg = remove_danswer_bot_tag(msg, client=client.web_client) if DANSWER_BOT_REPHRASE_MESSAGE: - logger.info(f"Rephrasing Slack message. Original message: {msg}") + logger.notice(f"Rephrasing Slack message. Original message: {msg}") try: msg = rephrase_slack_message(msg) - logger.info(f"Rephrased message: {msg}") + logger.notice(f"Rephrased message: {msg}") except Exception as e: logger.error(f"Error while trying to rephrase the Slack message: {e}") + else: + logger.notice(f"Received Slack message: {msg}") if tagged: - logger.info("User tagged DanswerBot") + logger.debug("User tagged DanswerBot") if thread_ts != message_ts and thread_ts is not None: thread_messages = read_slack_thread( @@ -437,7 +437,7 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: socket_client.socket_mode_request_listeners.append(process_slack_event) # type: ignore # Establish a WebSocket connection to the Socket Mode servers - logger.info("Listening for messages from Slack...") + logger.notice("Listening for messages from Slack...") socket_client.connect() @@ -454,7 +454,7 @@ if __name__ == "__main__": slack_bot_tokens: SlackBotTokens | None = None socket_client: SocketModeClient | None = None - logger.info("Verifying query preprocessing (NLTK) data is downloaded") + logger.notice("Verifying query preprocessing (NLTK) data is downloaded") download_nltk_data() while True: @@ -463,7 +463,7 @@ if __name__ == "__main__": if latest_slack_bot_tokens != slack_bot_tokens: if slack_bot_tokens is not None: - logger.info("Slack Bot tokens have changed - reconnecting") + logger.notice("Slack Bot tokens have changed - reconnecting") else: # This happens on the very first time the listener process comes up # or the tokens have updated (set up for the first time) diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py index b89b9f65f..b85565404 100644 --- a/backend/danswer/danswerbot/slack/utils.py +++ b/backend/danswer/danswerbot/slack/utils.py @@ -3,7 +3,6 @@ import random import re import string import time -from collections.abc import MutableMapping from typing import Any from typing import cast from typing import Optional @@ -25,7 +24,6 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.connectors.slack.utils import SlackTextCleaner from danswer.danswerbot.slack.constants import FeedbackVisibility -from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.tokens import fetch_tokens from danswer.db.engine import get_sqlalchemy_engine from danswer.db.users import get_user_by_email @@ -110,20 +108,6 @@ def remove_danswer_bot_tag(message_str: str, client: WebClient) -> str: return re.sub(rf"<@{bot_tag_id}>\s", "", message_str) -class ChannelIdAdapter(logging.LoggerAdapter): - """This is used to add the channel ID to all log messages - emitted in this file""" - - def process( - self, msg: str, kwargs: MutableMapping[str, Any] - ) -> tuple[str, MutableMapping[str, Any]]: - channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None - if channel_id: - return f"[Channel ID: {channel_id}] {msg}", kwargs - else: - return msg, kwargs - - def get_web_client() -> WebClient: slack_tokens = fetch_tokens() return WebClient(token=slack_tokens.bot_token) diff --git a/backend/danswer/db/credentials.py b/backend/danswer/db/credentials.py index 433e63dd9..1d27e9c43 100644 --- a/backend/danswer/db/credentials.py +++ b/backend/danswer/db/credentials.py @@ -271,9 +271,9 @@ def delete_credential( ) if force: - logger.info(f"Force deleting credential {credential_id}") + logger.warning(f"Force deleting credential {credential_id}") else: - logger.info(f"Deleting credential {credential_id}") + logger.notice(f"Deleting credential {credential_id}") db_session.delete(credential) db_session.commit() diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index c6104bbde..77ea4e3dd 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -335,7 +335,9 @@ def prepare_to_modify_documents( yield transaction break except OperationalError as e: - logger.info(f"Failed to acquire locks for documents, retrying. Error: {e}") + logger.warning( + f"Failed to acquire locks for documents, retrying. Error: {e}" + ) time.sleep(retry_delay) diff --git a/backend/danswer/document_index/vespa/chunk_retrieval.py b/backend/danswer/document_index/vespa/chunk_retrieval.py index 12f6de0c1..6a7427630 100644 --- a/backend/danswer/document_index/vespa/chunk_retrieval.py +++ b/backend/danswer/document_index/vespa/chunk_retrieval.py @@ -316,7 +316,7 @@ def query_vespa( response_json: dict[str, Any] = response.json() if LOG_VESPA_TIMING_INFORMATION: - logger.info("Vespa timing info: %s", response_json.get("timing")) + logger.debug("Vespa timing info: %s", response_json.get("timing")) hits = response_json["root"].get("children", []) for hit in hits: diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index 26c34c499..c2beffc86 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -366,7 +366,7 @@ class VespaIndex(DocumentIndex): ) self._apply_updates_batched(processed_updates_requests) - logger.info( + logger.debug( "Finished updating Vespa documents in %.2f seconds", time.monotonic() - update_start, ) diff --git a/backend/danswer/file_processing/extract_file_text.py b/backend/danswer/file_processing/extract_file_text.py index f96d4a415..7143b4287 100644 --- a/backend/danswer/file_processing/extract_file_text.py +++ b/backend/danswer/file_processing/extract_file_text.py @@ -190,7 +190,7 @@ def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str: except Exception: logger.error("Unable to decrypt pdf") else: - logger.info("No Password available to to decrypt pdf") + logger.warning("No Password available to to decrypt pdf") if not decrypt_success: # By user request, keep files that are unreadable just so they diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index 60faafe4d..db59a5c3d 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -166,7 +166,7 @@ def index_doc_batch_with_handler( index_attempt_metadata.num_exceptions += 1 if index_attempt_metadata.num_exceptions == INDEXING_EXCEPTION_LIMIT: - logger.info( + logger.warning( f"Maximum number of exceptions for this index attempt " f"({INDEXING_EXCEPTION_LIMIT}) has been reached. " f"The next exception will abort the indexing attempt." @@ -174,7 +174,7 @@ def index_doc_batch_with_handler( elif index_attempt_metadata.num_exceptions > INDEXING_EXCEPTION_LIMIT: logger.warning( f"Maximum number of exceptions for this index attempt " - f"({INDEXING_EXCEPTION_LIMIT}) has been exceeded. Raising RuntimeError." + f"({INDEXING_EXCEPTION_LIMIT}) has been exceeded." ) raise RuntimeError( f"Maximum exception limit of {INDEXING_EXCEPTION_LIMIT} exceeded." diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index 136f18fa6..a664db217 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -375,7 +375,7 @@ class Answer: else None ) - logger.info(f"Chosen tool: {chosen_tool_and_args}") + logger.notice(f"Chosen tool: {chosen_tool_and_args}") if not chosen_tool_and_args: prompt_builder.update_system_prompt( diff --git a/backend/danswer/llm/answering/prune_and_merge.py b/backend/danswer/llm/answering/prune_and_merge.py index b4bd4a348..43f592a83 100644 --- a/backend/danswer/llm/answering/prune_and_merge.py +++ b/backend/danswer/llm/answering/prune_and_merge.py @@ -190,7 +190,7 @@ def _apply_pruning( ): # If the section is just a little bit over, it is likely due to the additional tool message tokens # no need to record this, the content will be trimmed just in case - logger.info( + logger.warning( "Found more tokens in Section than expected, " "likely mismatch between embedding and LLM tokenizers. Trimming content..." ) diff --git a/backend/danswer/llm/answering/stream_processing/quotes_processing.py b/backend/danswer/llm/answering/stream_processing/quotes_processing.py index 4015903d3..74f37b852 100644 --- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py +++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py @@ -170,11 +170,11 @@ def process_answer( logger.debug("No answer extracted from raw output") return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) - logger.info(f"Answer: {answer}") + logger.notice(f"Answer: {answer}") if not quote_strings: logger.debug("No quotes extracted from raw output") return DanswerAnswer(answer=answer), DanswerQuotes(quotes=[]) - logger.info(f"All quotes (including unmatched): {quote_strings}") + logger.debug(f"All quotes (including unmatched): {quote_strings}") quotes = match_quotes_to_docs(quote_strings, docs) logger.debug(f"Final quotes: {quotes}") @@ -197,7 +197,7 @@ def _extract_quotes_from_completed_token_stream( ) -> DanswerQuotes: answer, quotes = process_answer(model_output, context_docs, is_json_prompt) if answer: - logger.info(answer) + logger.notice(answer) elif model_output: logger.warning("Answer extraction from model output failed.") diff --git a/backend/danswer/llm/chat_llm.py b/backend/danswer/llm/chat_llm.py index 228807965..3a489a6c1 100644 --- a/backend/danswer/llm/chat_llm.py +++ b/backend/danswer/llm/chat_llm.py @@ -226,7 +226,7 @@ class DefaultMultiLLM(LLM): self._model_kwargs = model_kwargs def log_model_configs(self) -> None: - logger.info(f"Config: {self.config}") + logger.debug(f"Config: {self.config}") def _completion( self, diff --git a/backend/danswer/llm/llm_initialization.py b/backend/danswer/llm/llm_initialization.py index 3bd31d188..fef17ca81 100644 --- a/backend/danswer/llm/llm_initialization.py +++ b/backend/danswer/llm/llm_initialization.py @@ -75,6 +75,6 @@ def load_llm_providers(db_session: Session) -> None: ) llm_provider = upsert_llm_provider(db_session, llm_provider_request) update_default_provider(db_session, llm_provider.id) - logger.info( + logger.notice( f"Migrated LLM provider from env variables for provider '{GEN_AI_MODEL_PROVIDER}'" ) diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 5e67bfd8d..99be2da0e 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -111,7 +111,6 @@ from danswer.tools.built_in_tools import auto_add_search_tool_to_personas from danswer.tools.built_in_tools import load_builtin_tools from danswer.tools.built_in_tools import refresh_built_in_tools_cache from danswer.utils.logger import setup_logger -from danswer.utils.logger import setup_uvicorn_logger from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation @@ -127,7 +126,6 @@ from shared_configs.enums import RerankerProvider logger = setup_logger() -setup_uvicorn_logger() def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse: @@ -179,22 +177,22 @@ def include_router_with_global_prefix_prepended( def setup_postgres(db_session: Session) -> None: - logger.info("Verifying default connector/credential exist.") + logger.notice("Verifying default connector/credential exist.") create_initial_public_credential(db_session) create_initial_default_connector(db_session) associate_default_cc_pair(db_session) - logger.info("Verifying default standard answer category exists.") + logger.notice("Verifying default standard answer category exists.") create_initial_default_standard_answer_category(db_session) - logger.info("Loading LLM providers from env variables") + logger.notice("Loading LLM providers from env variables") load_llm_providers(db_session) - logger.info("Loading default Prompts and Personas") + logger.notice("Loading default Prompts and Personas") delete_old_default_personas(db_session) load_chat_yamls() - logger.info("Loading built-in tools") + logger.notice("Loading built-in tools") load_builtin_tools(db_session) refresh_built_in_tools_cache(db_session) auto_add_search_tool_to_personas(db_session) @@ -238,7 +236,7 @@ def setup_vespa( ) break except Exception: - logger.info(f"Waiting on Vespa, retrying in {wait_time} seconds...") + logger.notice(f"Waiting on Vespa, retrying in {wait_time} seconds...") time.sleep(wait_time) @@ -279,25 +277,27 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # Expire all old embedding models indexing attempts, technically redundant cancel_indexing_attempts_past_model(db_session) - logger.info(f'Using Embedding model: "{db_embedding_model.model_name}"') + logger.notice(f'Using Embedding model: "{db_embedding_model.model_name}"') if db_embedding_model.query_prefix or db_embedding_model.passage_prefix: - logger.info(f'Query embedding prefix: "{db_embedding_model.query_prefix}"') - logger.info( + logger.notice( + f'Query embedding prefix: "{db_embedding_model.query_prefix}"' + ) + logger.notice( f'Passage embedding prefix: "{db_embedding_model.passage_prefix}"' ) search_settings = get_search_settings() if search_settings: if not search_settings.disable_rerank_for_streaming: - logger.info("Reranking is enabled.") + logger.notice("Reranking is enabled.") if search_settings.multilingual_expansion: - logger.info( + logger.notice( f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}." ) else: if DEFAULT_CROSS_ENCODER_MODEL_NAME: - logger.info("Reranking is enabled.") + logger.notice("Reranking is enabled.") if not DEFAULT_CROSS_ENCODER_MODEL_NAME: raise ValueError("No reranking model specified.") search_settings = SavedSearchSettings( @@ -322,7 +322,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: if search_settings.rerank_model_name and not search_settings.provider_type: warm_up_cross_encoder(search_settings.rerank_model_name) - logger.info("Verifying query preprocessing (NLTK) data is downloaded") + logger.notice("Verifying query preprocessing (NLTK) data is downloaded") download_nltk_data() # setup Postgres with default credential, llm providers, etc. @@ -333,7 +333,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: mark_reindex_flag(db_session) # ensure Vespa is setup correctly - logger.info("Verifying Document Index(s) is/are available.") + logger.notice("Verifying Document Index(s) is/are available.") document_index = get_default_document_index( primary_index_name=db_embedding_model.index_name, secondary_index_name=secondary_db_embedding_model.index_name @@ -342,7 +342,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: ) setup_vespa(document_index, db_embedding_model, secondary_db_embedding_model) - logger.info(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") + logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") if db_embedding_model.cloud_provider_id is None: warm_up_bi_encoder( embedding_model=db_embedding_model, @@ -485,11 +485,11 @@ app = fetch_versioned_implementation(module="danswer.main", attribute="get_appli if __name__ == "__main__": - logger.info( + logger.notice( f"Starting Danswer Backend version {__version__} on http://{APP_HOST}:{str(APP_PORT)}/" ) if global_version.get_is_ee_version(): - logger.info("Running Enterprise Edition") + logger.notice("Running Enterprise Edition") uvicorn.run(app, host=APP_HOST, port=APP_PORT) diff --git a/backend/danswer/search/postprocessing/postprocessing.py b/backend/danswer/search/postprocessing/postprocessing.py index 373aac115..97ff1acdf 100644 --- a/backend/danswer/search/postprocessing/postprocessing.py +++ b/backend/danswer/search/postprocessing/postprocessing.py @@ -39,7 +39,7 @@ def _log_top_section_links(search_flow: str, sections: list[InferenceSection]) - else "No Link" for section in sections ] - logger.info(f"Top links from {search_flow} search: {', '.join(top_links)}") + logger.debug(f"Top links from {search_flow} search: {', '.join(top_links)}") def cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk]: diff --git a/backend/danswer/search/retrieval/search_runner.py b/backend/danswer/search/retrieval/search_runner.py index 8da889b4b..669851ba8 100644 --- a/backend/danswer/search/retrieval/search_runner.py +++ b/backend/danswer/search/retrieval/search_runner.py @@ -265,7 +265,7 @@ def retrieve_chunks( top_chunks = combine_retrieval_results(parallel_search_results) if not top_chunks: - logger.info( + logger.warning( f"{query.search_type.value.capitalize()} search returned no results " f"with filters: {query.filters}" ) diff --git a/backend/danswer/server/middleware/latency_logging.py b/backend/danswer/server/middleware/latency_logging.py index f2bc3127a..ed269a545 100644 --- a/backend/danswer/server/middleware/latency_logging.py +++ b/backend/danswer/server/middleware/latency_logging.py @@ -16,7 +16,7 @@ def add_latency_logging_middleware(app: FastAPI, logger: logging.LoggerAdapter) start_time = time.monotonic() response = await call_next(request) process_time = time.monotonic() - start_time - logger.info( + logger.debug( f"Path: {request.url.path} - Method: {request.method} - " f"Status Code: {response.status_code} - Time: {process_time:.4f} secs" ) diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index a3b68fb73..71cf3092c 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -54,7 +54,7 @@ def admin_search( db_session: Session = Depends(get_session), ) -> AdminSearchResponse: query = question.query - logger.info(f"Received admin search query: {query}") + logger.notice(f"Received admin search query: {query}") user_acl_filters = build_access_filters_for_user(user, db_session) final_filters = IndexFilters( source_type=question.filters.source_type, @@ -122,7 +122,7 @@ def query_validation( # Note if weak model prompt is chosen, this check does not occur and will simply return that # the query is valid, this is because weaker models cannot really handle this task well. # Additionally, some weak model servers cannot handle concurrent inferences. - logger.info(f"Validating query: {simple_query.query}") + logger.notice(f"Validating query: {simple_query.query}") reasoning, answerable = get_query_answerability(simple_query.query) return QueryValidationResponse(reasoning=reasoning, answerable=answerable) @@ -231,7 +231,7 @@ def stream_query_validation( # Note if weak model prompt is chosen, this check does not occur and will simply return that # the query is valid, this is because weaker models cannot really handle this task well. # Additionally, some weak model servers cannot handle concurrent inferences. - logger.info(f"Validating query: {simple_query.query}") + logger.notice(f"Validating query: {simple_query.query}") return StreamingResponse( stream_query_answerability(simple_query.query), media_type="application/json" ) @@ -245,7 +245,7 @@ def get_answer_with_quote( ) -> StreamingResponse: query = query_request.messages[0].message - logger.info(f"Received query for one shot answer with quotes: {query}") + logger.notice(f"Received query for one shot answer with quotes: {query}") packets = stream_search_answer( query_req=query_request, diff --git a/backend/danswer/server/query_and_chat/token_limit.py b/backend/danswer/server/query_and_chat/token_limit.py index b44eec3a6..3f5d76bac 100644 --- a/backend/danswer/server/query_and_chat/token_limit.py +++ b/backend/danswer/server/query_and_chat/token_limit.py @@ -123,7 +123,7 @@ def _is_rate_limited( def any_rate_limit_exists() -> bool: """Checks if any rate limit exists in the database. Is cached, so that if no rate limits are setup, we don't have any effect on average query latency.""" - logger.info("Checking for any rate limits...") + logger.debug("Checking for any rate limits...") with get_session_context_manager() as db_session: return ( db_session.scalar( diff --git a/backend/danswer/tools/built_in_tools.py b/backend/danswer/tools/built_in_tools.py index 68fae10c0..59159e717 100644 --- a/backend/danswer/tools/built_in_tools.py +++ b/backend/danswer/tools/built_in_tools.py @@ -77,7 +77,7 @@ def load_builtin_tools(db_session: Session) -> None: tool.name = tool_name tool.description = tool_info["description"] tool.display_name = tool_info["display_name"] - logger.info(f"Updated tool: {tool_name}") + logger.notice(f"Updated tool: {tool_name}") else: # Add new tool new_tool = ToolDBModel( @@ -87,17 +87,17 @@ def load_builtin_tools(db_session: Session) -> None: in_code_tool_id=tool_info["in_code_tool_id"], ) db_session.add(new_tool) - logger.info(f"Added new tool: {tool_name}") + logger.notice(f"Added new tool: {tool_name}") # Remove tools that are no longer in BUILT_IN_TOOLS built_in_ids = {tool_info["in_code_tool_id"] for tool_info in BUILT_IN_TOOLS} for tool_id, tool in list(in_code_tool_id_to_tool.items()): if tool_id not in built_in_ids: db_session.delete(tool) - logger.info(f"Removed tool no longer in built-in list: {tool.name}") + logger.notice(f"Removed tool no longer in built-in list: {tool.name}") db_session.commit() - logger.info("All built-in tools are loaded/verified.") + logger.notice("All built-in tools are loaded/verified.") def auto_add_search_tool_to_personas(db_session: Session) -> None: @@ -140,11 +140,11 @@ def auto_add_search_tool_to_personas(db_session: Session) -> None: for persona in personas_to_update: if search_tool not in persona.tools: persona.tools.append(search_tool) - logger.info(f"Added SearchTool to Persona ID: {persona.id}") + logger.notice(f"Added SearchTool to Persona ID: {persona.id}") # Commit changes to the database db_session.commit() - logger.info("Completed adding SearchTool to relevant Personas.") + logger.notice("Completed adding SearchTool to relevant Personas.") _built_in_tools_cache: dict[int, Type[Tool]] | None = None diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py index 19fb14ae7..400b3a332 100644 --- a/backend/danswer/utils/logger.py +++ b/backend/danswer/utils/logger.py @@ -1,8 +1,10 @@ import logging import os from collections.abc import MutableMapping +from logging.handlers import RotatingFileHandler from typing import Any +from danswer.configs.constants import SLACK_CHANNEL_ID from shared_configs.configs import DEV_LOGGING_ENABLED from shared_configs.configs import LOG_FILE_NAME from shared_configs.configs import LOG_LEVEL @@ -48,14 +50,21 @@ class DanswerLoggingAdapter(logging.LoggerAdapter): # If this is an indexing job, add the attempt ID to the log message # This helps filter the logs for this specific indexing attempt_id = IndexAttemptSingleton.get_index_attempt_id() - if attempt_id is None: - return msg, kwargs + if attempt_id is not None: + msg = f"[Attempt ID: {attempt_id}] {msg}" - return f"[Attempt ID: {attempt_id}] {msg}", kwargs + # For Slack Bot, logs the channel relevant to the request + channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None + if channel_id: + msg = f"[Channel ID: {channel_id}] {msg}" - def notice(self, msg: str, *args: Any, **kwargs: Any) -> None: + return msg, kwargs + + def notice(self, msg: Any, *args: Any, **kwargs: Any) -> None: # Stacklevel is set to 2 to point to the actual caller of notice instead of here - self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2) + self.log( + logging.getLevelName("NOTICE"), str(msg), *args, **kwargs, stacklevel=2 + ) class ColoredFormatter(logging.Formatter): @@ -95,12 +104,13 @@ def get_standard_formatter() -> ColoredFormatter: def setup_logger( name: str = __name__, log_level: int = get_log_level_from_str(), + extra: MutableMapping[str, Any] | None = None, ) -> DanswerLoggingAdapter: logger = logging.getLogger(name) # If the logger already has handlers, assume it was already configured and return it. if logger.handlers: - return DanswerLoggingAdapter(logger) + return DanswerLoggingAdapter(logger, extra=extra) logger.setLevel(log_level) @@ -112,6 +122,12 @@ def setup_logger( logger.addHandler(handler) + uvicorn_logger = logging.getLogger("uvicorn.access") + if uvicorn_logger: + uvicorn_logger.handlers = [] + uvicorn_logger.addHandler(handler) + uvicorn_logger.setLevel(log_level) + is_containerized = os.path.exists("/.dockerenv") if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED): log_levels = ["debug", "info", "notice"] @@ -121,7 +137,7 @@ def setup_logger( if is_containerized else f"./log/{LOG_FILE_NAME}_{level}.log" ) - file_handler = logging.handlers.RotatingFileHandler( + file_handler = RotatingFileHandler( file_name, maxBytes=25 * 1024 * 1024, # 25 MB backupCount=5, # Keep 5 backup files @@ -130,17 +146,9 @@ def setup_logger( file_handler.setFormatter(formatter) logger.addHandler(file_handler) + if uvicorn_logger: + uvicorn_logger.addHandler(file_handler) + logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore - return DanswerLoggingAdapter(logger) - - -def setup_uvicorn_logger() -> None: - logger = logging.getLogger("uvicorn.access") - handler = logging.StreamHandler() - handler.setLevel(get_log_level_from_str(LOG_LEVEL)) - - formatter = get_standard_formatter() - handler.setFormatter(formatter) - - logger.handlers = [handler] + return DanswerLoggingAdapter(logger, extra=extra) diff --git a/backend/danswer/utils/timing.py b/backend/danswer/utils/timing.py index 8e240278d..0d4eb7a14 100644 --- a/backend/danswer/utils/timing.py +++ b/backend/danswer/utils/timing.py @@ -37,7 +37,8 @@ def log_function_time( if debug_only: logger.debug(final_log) else: - logger.info(final_log) + # These are generally more important logs so the level is a bit higher + logger.notice(final_log) if not print_only: optional_telemetry( diff --git a/backend/danswer/utils/variable_functionality.py b/backend/danswer/utils/variable_functionality.py index 3162869d8..f230d9e2f 100644 --- a/backend/danswer/utils/variable_functionality.py +++ b/backend/danswer/utils/variable_functionality.py @@ -25,7 +25,7 @@ global_version = DanswerVersion() def set_is_ee_based_on_env_variable() -> None: if ENTERPRISE_EDITION_ENABLED and not global_version.get_is_ee_version(): - logger.info("Enterprise Edition enabled") + logger.notice("Enterprise Edition enabled") global_version.set_ee() diff --git a/backend/ee/danswer/auth/users.py b/backend/ee/danswer/auth/users.py index e66953fbd..18dff6ab0 100644 --- a/backend/ee/danswer/auth/users.py +++ b/backend/ee/danswer/auth/users.py @@ -19,7 +19,7 @@ logger = setup_logger() def verify_auth_setting() -> None: # All the Auth flows are valid for EE version - logger.info(f"Using Auth Type: {AUTH_TYPE.value}") + logger.notice(f"Using Auth Type: {AUTH_TYPE.value}") async def optional_user_( diff --git a/backend/ee/danswer/background/permission_sync.py b/backend/ee/danswer/background/permission_sync.py index 300ee5c35..c14094b60 100644 --- a/backend/ee/danswer/background/permission_sync.py +++ b/backend/ee/danswer/background/permission_sync.py @@ -215,7 +215,7 @@ def permission_loop(delay: int = 60, num_workers: int = NUM_PERMISSION_WORKERS) def update__main() -> None: - logger.info("Starting Permission Syncing Loop") + logger.notice("Starting Permission Syncing Loop") init_sqlalchemy_engine(POSTGRES_PERMISSIONS_APP_NAME) permission_loop() diff --git a/backend/ee/danswer/server/enterprise_settings/store.py b/backend/ee/danswer/server/enterprise_settings/store.py index 128661cfd..37dd320d7 100644 --- a/backend/ee/danswer/server/enterprise_settings/store.py +++ b/backend/ee/danswer/server/enterprise_settings/store.py @@ -85,7 +85,7 @@ def upload_logo( content: IO[Any] if isinstance(file, str): - logger.info(f"Uploading logo from local path {file}") + logger.notice(f"Uploading logo from local path {file}") if not os.path.isfile(file) or not is_valid_file_type(file): logger.error( "Invalid file type- only .png, .jpg, and .jpeg files are allowed" @@ -99,7 +99,7 @@ def upload_logo( file_type = guess_file_type(file) else: - logger.info("Uploading logo from uploaded file") + logger.notice("Uploading logo from uploaded file") if not file.filename or not is_valid_file_type(file.filename): raise HTTPException( status_code=400, diff --git a/backend/ee/danswer/server/query_and_chat/chat_backend.py b/backend/ee/danswer/server/query_and_chat/chat_backend.py index 7d76850e1..80f939cf7 100644 --- a/backend/ee/danswer/server/query_and_chat/chat_backend.py +++ b/backend/ee/danswer/server/query_and_chat/chat_backend.py @@ -72,7 +72,7 @@ def handle_simplified_chat_message( db_session: Session = Depends(get_session), ) -> ChatBasicResponse: """This is a Non-Streaming version that only gives back a minimal set of information""" - logger.info(f"Received new simple api chat message: {chat_message_req.message}") + logger.notice(f"Received new simple api chat message: {chat_message_req.message}") if not chat_message_req.message: raise HTTPException(status_code=400, detail="Empty chat message is invalid") @@ -170,7 +170,7 @@ def handle_send_message_simple_with_history( query = req.messages[-1].message msg_history = req.messages[:-1] - logger.info(f"Received new simple with history chat message: {query}") + logger.notice(f"Received new simple with history chat message: {query}") user_id = user.id if user is not None else None chat_session = create_chat_session( diff --git a/backend/ee/danswer/server/query_and_chat/query_backend.py b/backend/ee/danswer/server/query_and_chat/query_backend.py index 30e7ee159..f6599abea 100644 --- a/backend/ee/danswer/server/query_and_chat/query_backend.py +++ b/backend/ee/danswer/server/query_and_chat/query_backend.py @@ -51,7 +51,7 @@ def handle_search_request( ) -> DocumentSearchResponse: """Simple search endpoint, does not create a new message or records in the DB""" query = search_request.message - logger.info(f"Received document search query: {query}") + logger.notice(f"Received document search query: {query}") llm, fast_llm = get_default_llms() search_pipeline = SearchPipeline( @@ -130,7 +130,7 @@ def get_answer_with_quote( db_session: Session = Depends(get_session), ) -> OneShotQAResponse: query = query_request.messages[0].message - logger.info(f"Received query for one shot answer API with quotes: {query}") + logger.notice(f"Received query for one shot answer API with quotes: {query}") persona = get_persona_by_id( persona_id=query_request.persona_id, diff --git a/backend/ee/danswer/server/saml.py b/backend/ee/danswer/server/saml.py index d09e978ce..5bc62e98d 100644 --- a/backend/ee/danswer/server/saml.py +++ b/backend/ee/danswer/server/saml.py @@ -50,7 +50,7 @@ async def upsert_saml_user(email: str) -> User: try: return await user_manager.get_by_email(email) except exceptions.UserNotExists: - logger.info("Creating user from SAML login") + logger.notice("Creating user from SAML login") user_count = await get_user_count() role = UserRole.ADMIN if user_count == 0 else UserRole.BASIC diff --git a/backend/ee/danswer/server/seeding.py b/backend/ee/danswer/server/seeding.py index 9dc29e2ca..bbca5acc2 100644 --- a/backend/ee/danswer/server/seeding.py +++ b/backend/ee/danswer/server/seeding.py @@ -49,7 +49,7 @@ def _seed_llms( db_session: Session, llm_upsert_requests: list[LLMProviderUpsertRequest] ) -> None: if llm_upsert_requests: - logger.info("Seeding LLMs") + logger.notice("Seeding LLMs") seeded_providers = [ upsert_llm_provider(db_session, llm_upsert_request) for llm_upsert_request in llm_upsert_requests @@ -59,7 +59,7 @@ def _seed_llms( def _seed_personas(db_session: Session, personas: list[CreatePersonaRequest]) -> None: if personas: - logger.info("Seeding Personas") + logger.notice("Seeding Personas") for persona in personas: upsert_persona( user=None, # Seeding is done as admin @@ -83,31 +83,31 @@ def _seed_personas(db_session: Session, personas: list[CreatePersonaRequest]) -> def _seed_settings(settings: Settings) -> None: - logger.info("Seeding Settings") + logger.notice("Seeding Settings") try: settings.check_validity() store_base_settings(settings) - logger.info("Successfully seeded Settings") + logger.notice("Successfully seeded Settings") except ValueError as e: logger.error(f"Failed to seed Settings: {str(e)}") def _seed_enterprise_settings(seed_config: SeedConfiguration) -> None: if seed_config.enterprise_settings is not None: - logger.info("Seeding enterprise settings") + logger.notice("Seeding enterprise settings") store_ee_settings(seed_config.enterprise_settings) def _seed_logo(db_session: Session, logo_path: str | None) -> None: if logo_path: - logger.info("Uploading logo") + logger.notice("Uploading logo") upload_logo(db_session=db_session, file=logo_path) def _seed_analytics_script(seed_config: SeedConfiguration) -> None: custom_analytics_secret_key = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY") if seed_config.analytics_script_path and custom_analytics_secret_key: - logger.info("Seeding analytics script") + logger.notice("Seeding analytics script") try: with open(seed_config.analytics_script_path, "r") as file: script_content = file.read() @@ -130,7 +130,7 @@ def get_seed_config() -> SeedConfiguration | None: def seed_db() -> None: seed_config = _parse_env() if seed_config is None: - logger.info("No seeding configuration file passed") + logger.debug("No seeding configuration file passed") return with get_session_context_manager() as db_session: diff --git a/backend/model_server/custom_models.py b/backend/model_server/custom_models.py index adddadd89..3c247336e 100644 --- a/backend/model_server/custom_models.py +++ b/backend/model_server/custom_models.py @@ -48,7 +48,7 @@ def get_local_intent_model( logger.warning(f"Failed to load model directly: {e}") try: # Attempt to download the model snapshot - logger.info(f"Downloading model snapshot for {model_name_or_path}") + logger.notice(f"Downloading model snapshot for {model_name_or_path}") local_path = snapshot_download(repo_id=model_name_or_path, revision=tag) _INTENT_MODEL = HybridClassifier.from_pretrained(local_path) except Exception as e: @@ -60,7 +60,7 @@ def get_local_intent_model( def warm_up_intent_model() -> None: - logger.info(f"Warming up Intent Model: {INTENT_MODEL_VERSION}") + logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}") intent_tokenizer = get_intent_model_tokenizer() tokens = intent_tokenizer( MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 623782857..51e17fa3d 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -208,7 +208,7 @@ def get_embedding_model( _GLOBAL_MODELS_DICT = {} if model_name not in _GLOBAL_MODELS_DICT: - logger.info(f"Loading {model_name}") + logger.notice(f"Loading {model_name}") # Some model architectures that aren't built into the Transformers or Sentence # Transformer need to be downloaded to be loaded locally. This does not mean # data is sent to remote servers for inference, however the remote code can @@ -230,7 +230,7 @@ def get_local_reranking_model( ) -> CrossEncoder: global _RERANK_MODEL if _RERANK_MODEL is None: - logger.info(f"Loading {model_name}") + logger.notice(f"Loading {model_name}") model = CrossEncoder(model_name) _RERANK_MODEL = model return _RERANK_MODEL diff --git a/backend/model_server/main.py b/backend/model_server/main.py index d0fe95a75..c0851df6c 100644 --- a/backend/model_server/main.py +++ b/backend/model_server/main.py @@ -54,23 +54,23 @@ def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) - @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator: if torch.cuda.is_available(): - logger.info("GPU is available") + logger.notice("GPU is available") else: - logger.info("GPU is not available") + logger.notice("GPU is not available") if TEMP_HF_CACHE_PATH.is_dir(): - logger.info("Moving contents of temp_huggingface to huggingface cache.") + logger.notice("Moving contents of temp_huggingface to huggingface cache.") _move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH) shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True) - logger.info("Moved contents of temp_huggingface to huggingface cache.") + logger.notice("Moved contents of temp_huggingface to huggingface cache.") torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads())) - logger.info(f"Torch Threads: {torch.get_num_threads()}") + logger.notice(f"Torch Threads: {torch.get_num_threads()}") if not INDEXING_ONLY: warm_up_intent_model() else: - logger.info("This model server should only run document indexing.") + logger.notice("This model server should only run document indexing.") yield @@ -91,8 +91,8 @@ app = get_model_app() if __name__ == "__main__": - logger.info( + logger.notice( f"Starting Danswer Model Server on http://{MODEL_SERVER_ALLOWED_HOST}:{str(MODEL_SERVER_PORT)}/" ) - logger.info(f"Model Server Version: {__version__}") + logger.notice(f"Model Server Version: {__version__}") uvicorn.run(app, host=MODEL_SERVER_ALLOWED_HOST, port=MODEL_SERVER_PORT) diff --git a/backend/model_server/utils.py b/backend/model_server/utils.py index 3ebae26e5..0c2d6bac5 100644 --- a/backend/model_server/utils.py +++ b/backend/model_server/utils.py @@ -32,7 +32,7 @@ def simple_log_function_time( if debug_only: logger.debug(final_log) else: - logger.info(final_log) + logger.notice(final_log) return result diff --git a/backend/scripts/force_delete_connector_by_id.py b/backend/scripts/force_delete_connector_by_id.py index 267b155af..118a4dfa4 100755 --- a/backend/scripts/force_delete_connector_by_id.py +++ b/backend/scripts/force_delete_connector_by_id.py @@ -119,7 +119,7 @@ def _unsafe_deletion( db_session.delete(connector) db_session.commit() - logger.info( + logger.notice( "Successfully deleted connector_credential_pair with connector_id:" f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs." ) @@ -133,10 +133,10 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: Are you SURE you want to continue? (enter 'Y' to continue): " ) if user_input != "Y": - logger.info(f"You entered {user_input}. Exiting!") + logger.notice(f"You entered {user_input}. Exiting!") return - logger.info("Getting connector credential pair") + logger.notice("Getting connector credential pair") cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) if not cc_pair: @@ -160,7 +160,7 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: ) return - logger.info("Cancelling indexing attempt for the connector") + logger.notice("Cancelling indexing attempt for the connector") cancel_indexing_attempts_for_ccpair( cc_pair_id=cc_pair_id, db_session=db_session, include_secondary_index=True ) @@ -183,7 +183,7 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: else [] ) try: - logger.info("Deleting information from Vespa and Postgres") + logger.notice("Deleting information from Vespa and Postgres") curr_ind_name, sec_ind_name = get_both_index_names(db_session) document_index = get_default_document_index( primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name @@ -195,16 +195,16 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: cc_pair=cc_pair, pair_id=cc_pair_id, ) - logger.info(f"Deleted {files_deleted_count} files!") + logger.notice(f"Deleted {files_deleted_count} files!") except Exception as e: logger.error(f"Failed to delete connector due to {e}") if file_names: - logger.info("Deleting stored files!") + logger.notice("Deleting stored files!") file_store = get_default_file_store(db_session) for file_name in file_names: - logger.info(f"Deleting file {file_name}") + logger.notice(f"Deleting file {file_name}") file_store.delete_file(file_name) diff --git a/backend/scripts/save_load_state.py b/backend/scripts/save_load_state.py index 19d6fa66e..94431e8c8 100644 --- a/backend/scripts/save_load_state.py +++ b/backend/scripts/save_load_state.py @@ -21,7 +21,7 @@ logger = setup_logger() def save_postgres(filename: str, container_name: str) -> None: - logger.info("Attempting to take Postgres snapshot") + logger.notice("Attempting to take Postgres snapshot") cmd = f"docker exec {container_name} pg_dump -U {POSTGRES_USER} -h {POSTGRES_HOST} -p {POSTGRES_PORT} -W -F t {POSTGRES_DB}" with open(filename, "w") as file: subprocess.run( @@ -35,7 +35,7 @@ def save_postgres(filename: str, container_name: str) -> None: def load_postgres(filename: str, container_name: str) -> None: - logger.info("Attempting to load Postgres snapshot") + logger.notice("Attempting to load Postgres snapshot") try: alembic_cfg = Config("alembic.ini") command.upgrade(alembic_cfg, "head") @@ -57,7 +57,7 @@ def load_postgres(filename: str, container_name: str) -> None: def save_vespa(filename: str) -> None: - logger.info("Attempting to take Vespa snapshot") + logger.notice("Attempting to take Vespa snapshot") continuation = "" params = {} doc_jsons: list[dict] = [] diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index 3e69eae01..eb822f009 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -51,4 +51,4 @@ LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer" # Enable generating persistent log files for local dev environments DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true" # notset, debug, info, notice, warning, error, or critical -LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") +LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice")