diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py
index e23ede24b6..d10ab00f87 100644
--- a/backend/danswer/configs/model_configs.py
+++ b/backend/danswer/configs/model_configs.py
@@ -18,6 +18,9 @@ CROSS_ENCODER_MODEL_ENSEMBLE = [
     "cross-encoder/ms-marco-TinyBERT-L-2-v2",
 ]
 
+# Better to keep it loose, surfacing more results better than missing results
+SEARCH_DISTANCE_CUTOFF = 0.1  # Cosine similarity (currently), range of -1 to 1 with -1 being completely opposite
+
 QUERY_MAX_CONTEXT_SIZE = 256
 # The below is correlated with CHUNK_SIZE in app_configs but not strictly calculated
 # To avoid extra overhead of tokenizing for chunking during indexing.
diff --git a/backend/danswer/datastores/qdrant/store.py b/backend/danswer/datastores/qdrant/store.py
index 4ff71626f1..333e67906e 100644
--- a/backend/danswer/datastores/qdrant/store.py
+++ b/backend/danswer/datastores/qdrant/store.py
@@ -7,6 +7,7 @@ from danswer.configs.app_configs import NUM_RETURNED_HITS
 from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
 from danswer.configs.constants import ALLOWED_USERS
 from danswer.configs.constants import PUBLIC_DOC_PAT
+from danswer.configs.model_configs import SEARCH_DISTANCE_CUTOFF
 from danswer.datastores.datastore_utils import get_uuid_from_chunk
 from danswer.datastores.interfaces import IndexFilter
 from danswer.datastores.interfaces import VectorIndex
@@ -92,7 +93,8 @@ class QdrantIndex(VectorIndex):
         user_id: int | None,
         filters: list[IndexFilter] | None,
         num_to_retrieve: int = NUM_RETURNED_HITS,
-        page_size: int = NUM_RERANKED_RESULTS,
+        page_size: int = NUM_RETURNED_HITS,
+        distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
     ) -> list[InferenceChunk]:
         query_embedding = get_default_embedding_model().encode(
             query
@@ -113,6 +115,7 @@ class QdrantIndex(VectorIndex):
                     query_filter=Filter(must=list(filter_conditions)),
                     limit=page_size,
                     offset=page_offset,
+                    score_threshold=distance_cutoff,
                 )
                 page_offset += page_size
                 if not hits:
diff --git a/backend/danswer/search/semantic_search.py b/backend/danswer/search/semantic_search.py
index 0e84982a5e..bf074d17f4 100644
--- a/backend/danswer/search/semantic_search.py
+++ b/backend/danswer/search/semantic_search.py
@@ -64,6 +64,8 @@ def retrieve_ranked_documents(
     num_hits: int = NUM_RETURNED_HITS,
     num_rerank: int = NUM_RERANKED_RESULTS,
 ) -> tuple[list[InferenceChunk] | None, list[InferenceChunk] | None]:
+    """Uses vector similarity to fetch the top num_hits document chunks with a distance cutoff.
+    Reranks the top num_rerank out of those (instead of all due to latency)"""
     top_chunks = datastore.semantic_retrieval(query, user_id, filters, num_hits)
     if not top_chunks:
         filters_log_msg = json.dumps(filters, separators=(",", ":")).replace("\n", "")