diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py
index 140162a7b04b..6c66dc785606 100644
--- a/backend/danswer/connectors/web/connector.py
+++ b/backend/danswer/connectors/web/connector.py
@@ -29,6 +29,7 @@ from danswer.connectors.models import Section
 from danswer.file_processing.extract_file_text import pdf_to_text
 from danswer.file_processing.html_utils import web_html_cleanup
 from danswer.utils.logger import setup_logger
+from danswer.utils.sitemap import list_pages_for_site
 
 logger = setup_logger()
 
@@ -145,16 +146,21 @@ def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
     response.raise_for_status()
 
     soup = BeautifulSoup(response.content, "html.parser")
-    result = [
+    urls = [
         _ensure_absolute_url(sitemap_url, loc_tag.text)
         for loc_tag in soup.find_all("loc")
     ]
-    if not result:
+
+    if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
+        # the given url doesn't look like a sitemap, let's try to find one
+        urls = list_pages_for_site(sitemap_url)
+
+    if len(urls) == 0:
         raise ValueError(
             f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
         )
 
-    return result
+    return urls
 
 
 def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
diff --git a/backend/danswer/utils/sitemap.py b/backend/danswer/utils/sitemap.py
new file mode 100644
index 000000000000..3b518688cdee
--- /dev/null
+++ b/backend/danswer/utils/sitemap.py
@@ -0,0 +1,35 @@
+from urllib import robotparser
+from usp.tree import sitemap_tree_for_homepage
+from datetime import datetime
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+def test_url(rp, url):
+    if not rp:
+        return True
+    else:
+        return rp.can_fetch("*", url)
+
+def init_robots_txt(site):
+    ts = datetime.now().timestamp()
+    robots_url = f"{url}/robots.txt?ts={ts}"
+    rp = robotparser.RobotFileParser()
+    rp.set_url(robots_url)
+    rp.read()
+    return rp
+
+def list_pages_for_site(site):
+  rp = None
+  try:
+    rp = init_robots_txt(site)
+  except:
+    logger.warning("Failed to load robots.txt")
+
+  tree = sitemap_tree_for_homepage(site)
+
+  pages = [page.url for page in tree.all_pages() if test_url(rp, page)]
+  pages = list(dict.fromkeys(pages))
+
+  return(pages)
+
diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
index 76f4387aeb9a..fc36432dfd0f 100644
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -72,4 +72,5 @@ zulip==0.8.2
 hubspot-api-client==8.1.0
 zenpy==2.0.41
 dropbox==11.36.2
-boto3-stubs[s3]==1.34.133
\ No newline at end of file
+boto3-stubs[s3]==1.34.133
+ultimate_sitemap_parser==0.5
diff --git a/web/src/app/admin/connectors/web/page.tsx b/web/src/app/admin/connectors/web/page.tsx
index 410d187920e7..06e2a000e347 100644
--- a/web/src/app/admin/connectors/web/page.tsx
+++ b/web/src/app/admin/connectors/web/page.tsx
@@ -98,7 +98,7 @@ export default function Web() {
                       name: "Sitemap",
                       value: "sitemap",
                       description:
-                        "Assumes the URL to Index points to a Sitemap. Will try and index all pages that are a mentioned in the sitemap.",
+                        "Enter the sitemap url or the root of the site which we can scan for a sitemap",
                     },
                   ]}
                 />