diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py
index b7359f19ce4c..1182dd08f499 100644
--- a/backend/danswer/background/indexing/run_indexing.py
+++ b/backend/danswer/background/indexing/run_indexing.py
@@ -7,6 +7,7 @@ from datetime import timezone
 from sqlalchemy.orm import Session
 
 from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt
+from danswer.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD
 from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET
 from danswer.connectors.factory import instantiate_connector
 from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -200,9 +201,22 @@ def _run_indexing(
                     # Likely due to user manually disabling it or model swap
                     raise RuntimeError("Index Attempt was canceled")
 
-                logger.debug(
-                    f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}"
-                )
+                batch_description = []
+                for doc in doc_batch:
+                    batch_description.append(doc.to_short_descriptor())
+
+                    doc_size = 0
+                    for section in doc.sections:
+                        doc_size += len(section.text)
+
+                    if doc_size > INDEXING_SIZE_WARNING_THRESHOLD:
+                        logger.warning(
+                            f"Document size: doc='{doc.to_short_descriptor()}' "
+                            f"size={doc_size} "
+                            f"threshold={INDEXING_SIZE_WARNING_THRESHOLD}"
+                        )
+
+                logger.debug(f"Indexing batch of documents: {batch_description}")
 
                 new_docs, total_batch_chunks = indexing_pipeline(
                     document_batch=doc_batch,
diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py
index adfb1c712316..50f7fc5ad54c 100644
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -202,6 +202,11 @@ CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING = (
     os.environ.get("CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING", "").lower() == "true"
 )
 
+# Attachments exceeding this size will not be retrieved (in bytes)
+CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD = int(
+    os.environ.get("CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD", 50 * 1024 * 1024)
+)
+
 JIRA_CONNECTOR_LABELS_TO_SKIP = [
     ignored_tag
     for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
@@ -277,6 +282,10 @@ SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() ==
 # Timeout to wait for job's last update before killing it, in hours
 CLEANUP_INDEXING_JOBS_TIMEOUT = int(os.environ.get("CLEANUP_INDEXING_JOBS_TIMEOUT", 3))
 
+# The indexer will warn in the logs whenver a document exceeds this threshold (in bytes)
+INDEXING_SIZE_WARNING_THRESHOLD = int(
+    os.environ.get("INDEXING_SIZE_WARNING_THRESHOLD", 100 * 1024 * 1024)
+)
 
 #####
 # Miscellaneous
diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py
index 30a9032c9767..3e366ead1c5c 100644
--- a/backend/danswer/connectors/confluence/connector.py
+++ b/backend/danswer/connectors/confluence/connector.py
@@ -13,6 +13,7 @@ import bs4
 from atlassian import Confluence  # type:ignore
 from requests import HTTPError
 
+from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD
 from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES
 from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
 from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING
@@ -560,6 +561,17 @@ class ConfluenceConnector(LoadConnector, PollConnector):
                 if attachment["title"] not in files_in_used:
                     continue
 
+                download_link = confluence_client.url + attachment["_links"]["download"]
+
+                attachment_size = attachment["extensions"]["fileSize"]
+                if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD:
+                    logger.warning(
+                        f"Skipping {download_link} due to size. "
+                        f"size={attachment_size} "
+                        f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}"
+                    )
+                    continue
+
                 download_link = confluence_client.url + attachment["_links"]["download"]
                 response = confluence_client._session.get(download_link)