diff --git a/backend/Dockerfile.model_server b/backend/Dockerfile.model_server
index da62eddf3..451bf5cfc 100644
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -32,10 +32,13 @@ AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
 AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
 from huggingface_hub import snapshot_download; \
 snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
+snapshot_download(repo_id='sentence-transformers/paraphrase-mpnet-base-v2'); \
 snapshot_download('nomic-ai/nomic-embed-text-v1'); \
 snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
 from sentence_transformers import SentenceTransformer; \
-SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
+SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True); \
+from setfit import SetFitModel; \
+SetFitModel.from_pretrained('sentence-transformers/paraphrase-mpnet-base-v2');"
 
 # In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
 # running Onyx, don't overwrite it with the built in cache folder
diff --git a/backend/model_server/constants.py b/backend/model_server/constants.py
index d026d4a76..fdab6018d 100644
--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -3,6 +3,7 @@ from shared_configs.enums import EmbedTextType
 
 
 MODEL_WARM_UP_STRING = "hi " * 512
+CONTENT_MODEL_WARM_UP_STRING = "hi " * 16
 DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
 DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
 DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
diff --git a/backend/model_server/custom_models.py b/backend/model_server/custom_models.py
index db8ba5d0c..2b614f81f 100644
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -2,10 +2,12 @@ import torch
 import torch.nn.functional as F
 from fastapi import APIRouter
 from huggingface_hub import snapshot_download  # type: ignore
+from setfit import SetFitModel  # type: ignore[import]
 from transformers import AutoTokenizer  # type: ignore
 from transformers import BatchEncoding  # type: ignore
 from transformers import PreTrainedTokenizer  # type: ignore
 
+from model_server.constants import CONTENT_MODEL_WARM_UP_STRING
 from model_server.constants import MODEL_WARM_UP_STRING
 from model_server.onyx_torch_model import ConnectorClassifier
 from model_server.onyx_torch_model import HybridClassifier
@@ -13,6 +15,7 @@ from model_server.utils import simple_log_function_time
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
 from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
+from shared_configs.configs import CONTENT_MODEL_VERSION
 from shared_configs.configs import INDEXING_ONLY
 from shared_configs.configs import INTENT_MODEL_TAG
 from shared_configs.configs import INTENT_MODEL_VERSION
@@ -21,6 +24,7 @@ from shared_configs.model_server_models import ConnectorClassificationResponse
 from shared_configs.model_server_models import IntentRequest
 from shared_configs.model_server_models import IntentResponse
 
+
 logger = setup_logger()
 
 router = APIRouter(prefix="/custom")
@@ -31,6 +35,10 @@ _CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None
 _INTENT_TOKENIZER: AutoTokenizer | None = None
 _INTENT_MODEL: HybridClassifier | None = None
 
+_CONTENT_MODEL: SetFitModel | None = None
+
+_TEMPERATURE_CONTENT_CLASSIFICATION = 4.0
+
 
 def get_connector_classifier_tokenizer() -> AutoTokenizer:
     global _CONNECTOR_CLASSIFIER_TOKENIZER
@@ -112,6 +120,13 @@ def get_local_intent_model(
     return _INTENT_MODEL
 
 
+def get_local_content_model() -> SetFitModel:
+    global _CONTENT_MODEL
+    if _CONTENT_MODEL is None:
+        _CONTENT_MODEL = SetFitModel(CONTENT_MODEL_VERSION)
+    return _CONTENT_MODEL
+
+
 def tokenize_connector_classification_query(
     connectors: list[str],
     query: str,
@@ -195,6 +210,16 @@ def warm_up_intent_model() -> None:
     )
 
 
+def warm_up_content_model() -> None:
+    logger.notice(
+        "Warming up Content Model"
+    )  # TODO: add version once we have proper model
+
+    content_model = get_local_content_model()
+    content_model.device
+    content_model(CONTENT_MODEL_WARM_UP_STRING)
+
+
 @simple_log_function_time()
 def run_inference(tokens: BatchEncoding) -> tuple[list[float], list[float]]:
     intent_model = get_local_intent_model()
@@ -218,6 +243,29 @@ def run_inference(tokens: BatchEncoding) -> tuple[list[float], list[float]]:
     return intent_probabilities.tolist(), token_positive_probs
 
 
+@simple_log_function_time()
+def run_content_classification_inference(
+    text_inputs: list[str],
+) -> list[tuple[int, float]]:
+    get_local_content_model()
+
+    # output_classes = list([x.numpy() for x in content_model(text_inputs)])
+    # base_output_probabilities = list([x[1].numpy() for x in content_model.predict_proba(text_inputs)])
+    # logits = [np.log(p/(1-p)) for p in  base_output_probabilities]
+    # scaled_logits = [l/_TEMPERATURE_CONTENT_CLASSIFICATION for l in logits]
+    # output_probabilities_with_temp = [np.exp(scaled_logit)/(1 + np.exp(scaled_logit)) for scaled_logit in scaled_logits]
+
+    output_classes = [1] * len(text_inputs)
+    output_probabilities_with_temp = [0.9] * len(text_inputs)
+
+    return [
+        (predicted_label, predicted_probability)
+        for predicted_label, predicted_probability in zip(
+            output_classes, output_probabilities_with_temp
+        )
+    ]
+
+
 def map_keywords(
     input_ids: torch.Tensor, tokenizer: AutoTokenizer, is_keyword: list[bool]
 ) -> list[str]:
@@ -362,3 +410,13 @@ async def process_analysis_request(
 
     is_keyword, keywords = run_analysis(intent_request)
     return IntentResponse(is_keyword=is_keyword, keywords=keywords)
+
+
+@router.post("/content-classification")
+async def process_content_classification_request(
+    content_classification_requests: list[str],
+) -> list[tuple[int, float]]:
+    content_classification_result = run_content_classification_inference(
+        content_classification_requests
+    )
+    return content_classification_result
diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py
index 612cacc3e..e463d3102 100644
--- a/backend/onyx/background/indexing/run_indexing.py
+++ b/backend/onyx/background/indexing/run_indexing.py
@@ -53,6 +53,9 @@ from onyx.httpx.httpx_pool import HttpxPool
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.indexing.indexing_pipeline import build_indexing_pipeline
+from onyx.natural_language_processing.search_nlp_models import (
+    ContentClassificationModel,
+)
 from onyx.utils.logger import setup_logger
 from onyx.utils.logger import TaskAttemptSingleton
 from onyx.utils.telemetry import create_milestone_and_report
@@ -348,6 +351,10 @@ def _run_indexing(
             callback=callback,
         )
 
+    content_classification_model = ContentClassificationModel(
+        model_server_host="localhost", model_server_port=9000
+    )
+
     document_index = get_default_document_index(
         index_attempt_start.search_settings,
         None,
@@ -356,6 +363,7 @@ def _run_indexing(
 
     indexing_pipeline = build_indexing_pipeline(
         embedder=embedding_model,
+        content_classification_model=content_classification_model,
         document_index=document_index,
         ignore_time_skip=(
             ctx.from_beginning
diff --git a/backend/onyx/document_index/interfaces.py b/backend/onyx/document_index/interfaces.py
index 463abbc95..152b420e2 100644
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -101,6 +101,7 @@ class VespaDocumentFields:
     document_sets: set[str] | None = None
     boost: float | None = None
     hidden: bool | None = None
+    aggregated_boost_factor: float | None = None
 
 
 @dataclass
diff --git a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
index d5d5220f8..aacadb951 100644
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@@ -80,6 +80,11 @@ schema DANSWER_CHUNK_NAME {
             indexing: summary | attribute
             rank: filter
         }
+        # Field to indicate whether a short chunk is a low content chunk
+        field aggregated_boost_factor type float {
+            indexing: attribute
+        }
+
         # Needs to have a separate Attribute list for efficient filtering
         field metadata_list type array<string> {
             indexing: summary | attribute
@@ -142,6 +147,11 @@ schema DANSWER_CHUNK_NAME {
             expression: max(if(isNan(attribute(doc_updated_at)) == 1, 7890000, now() - attribute(doc_updated_at)) / 31536000, 0)
         }
 
+        function inline document_aggregated_boost_factor() {
+            # Time in years (91.3 days ~= 3 Months ~= 1 fiscal quarter if no age found)
+            expression: if(isNan(attribute(aggregated_boost_factor)) == 1, 1.0, attribute(aggregated_boost_factor))
+        }
+
         # Document score decays from 1 to 0.75 as age of last updated time increases
         function inline recency_bias() {
             expression: max(1 / (1 + query(decay_factor) * document_age), 0.75)
@@ -199,6 +209,8 @@ schema DANSWER_CHUNK_NAME {
                 * document_boost
                 # Decay factor based on time document was last updated
                 * recency_bias
+                # Boost based on aggregated boost calculation
+                * document_aggregated_boost_factor
             }
             rerank-count: 1000
         }
@@ -210,6 +222,7 @@ schema DANSWER_CHUNK_NAME {
             closeness(field, embeddings)
             document_boost
             recency_bias
+            document_aggregated_boost_factor
             closest(embeddings)
         }
     }
diff --git a/backend/onyx/document_index/vespa/indexing_utils.py b/backend/onyx/document_index/vespa/indexing_utils.py
index 81fc2a0d4..feb7580d6 100644
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -22,6 +22,7 @@ from onyx.document_index.vespa.shared_utils.utils import (
     replace_invalid_doc_id_characters,
 )
 from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
+from onyx.document_index.vespa_constants import AGGREGATED_BOOST_FACTOR
 from onyx.document_index.vespa_constants import BLURB
 from onyx.document_index.vespa_constants import BOOST
 from onyx.document_index.vespa_constants import CHUNK_ID
@@ -201,6 +202,7 @@ def _index_vespa_chunk(
         DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},
         IMAGE_FILE_NAME: chunk.image_file_name,
         BOOST: chunk.boost,
+        AGGREGATED_BOOST_FACTOR: chunk.aggregated_boost_factor,
     }
 
     if multitenant:
diff --git a/backend/onyx/document_index/vespa_constants.py b/backend/onyx/document_index/vespa_constants.py
index 15f889f3c..f29c48fe4 100644
--- a/backend/onyx/document_index/vespa_constants.py
+++ b/backend/onyx/document_index/vespa_constants.py
@@ -72,6 +72,7 @@ METADATA = "metadata"
 METADATA_LIST = "metadata_list"
 METADATA_SUFFIX = "metadata_suffix"
 BOOST = "boost"
+AGGREGATED_BOOST_FACTOR = "aggregated_boost_factor"
 DOC_UPDATED_AT = "doc_updated_at"  # Indexed as seconds since epoch
 PRIMARY_OWNERS = "primary_owners"
 SECONDARY_OWNERS = "secondary_owners"
@@ -97,6 +98,7 @@ YQL_BASE = (
     f"{SECTION_CONTINUATION}, "
     f"{IMAGE_FILE_NAME}, "
     f"{BOOST}, "
+    f"{AGGREGATED_BOOST_FACTOR}, "
     f"{HIDDEN}, "
     f"{DOC_UPDATED_AT}, "
     f"{PRIMARY_OWNERS}, "
diff --git a/backend/onyx/indexing/content_classification.py b/backend/onyx/indexing/content_classification.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/onyx/indexing/indexing_pipeline.py b/backend/onyx/indexing/indexing_pipeline.py
index f4a6e0075..3c1b6b303 100644
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -52,7 +52,11 @@ from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk
+from onyx.indexing.models import IndexChunk
 from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
+from onyx.natural_language_processing.search_nlp_models import (
+    ContentClassificationModel,
+)
 from onyx.llm.factory import get_default_llm_with_vision
 from onyx.utils.logger import setup_logger
 from onyx.utils.timing import log_function_time
@@ -136,6 +140,81 @@ def _upsert_documents_in_db(
             )
 
 
+def _get_aggregated_boost_factor(
+    chunks: list[IndexChunk], content_classification_model: ContentClassificationModel
+) -> tuple[list[IndexChunk], list[float], list[ConnectorFailure]]:
+    """Calculates the aggregated boost factor for a chunk based on its content."""
+
+    short_chunk_content_dict = {
+        chunk_num: chunk.content
+        for chunk_num, chunk in enumerate(chunks)
+        if len(chunk.content.split()) <= 10
+    }
+    short_chunk_contents = list(short_chunk_content_dict.values())
+    short_chunk_keys = list(short_chunk_content_dict.keys())
+
+    try:
+        short_content_classification_predictions = content_classification_model.predict(
+            short_chunk_contents
+        )
+        short_content_classification_results = [
+            raw_score for _, raw_score in short_content_classification_predictions
+        ]
+        short_content_classification_results_dict = {
+            short_chunk_keys[i]: short_content_classification_results[i]
+            for i in range(len(short_chunk_keys))
+        }
+        chunk_content_scores = [
+            1.0
+            if chunk_num not in short_chunk_keys
+            else short_content_classification_results_dict[chunk_num]
+            for chunk_num in range(len(chunks))
+        ]
+
+        return chunks, chunk_content_scores, []
+
+    except Exception as e:
+        logger.exception(
+            f"Error predicting content classification for chunks: {e}. Falling back to individual examples."
+        )
+
+        chunks_with_scores: list[IndexChunk] = []
+        chunk_content_scores = []
+        failures: list[ConnectorFailure] = []
+
+        for chunk in chunks:
+            if len(chunk.content.split()) <= 10:
+                try:
+                    chunk_content_scores.append(
+                        content_classification_model.predict([chunk.content])[0][1]
+                    )
+                    chunks_with_scores.append(chunk)
+                except Exception as e:
+                    logger.exception(
+                        f"Error predicting content classification for chunk: {e}. Adding to missed content classifications."
+                    )
+                    # chunk_content_scores.append(1.0)
+                    failures.append(
+                        ConnectorFailure(
+                            failed_document=DocumentFailure(
+                                document_id=chunk.source_document.id,
+                                document_link=(
+                                    chunk.source_document.sections[0].link
+                                    if chunk.source_document.sections
+                                    else None
+                                ),
+                            ),
+                            failure_message=str(e),
+                            exception=e,
+                        )
+                    )
+            else:
+                chunk_content_scores.append(1.0)
+                chunks_with_scores.append(chunk)
+
+        return chunks_with_scores, chunk_content_scores, failures
+
+
 def get_doc_ids_to_update(
     documents: list[Document], db_docs: list[DBDocument]
 ) -> list[Document]:
@@ -165,6 +244,7 @@ def index_doc_batch_with_handler(
     *,
     chunker: Chunker,
     embedder: IndexingEmbedder,
+    content_classification_model: ContentClassificationModel,
     document_index: DocumentIndex,
     document_batch: list[Document],
     index_attempt_metadata: IndexAttemptMetadata,
@@ -176,6 +256,7 @@ def index_doc_batch_with_handler(
         index_pipeline_result = index_doc_batch(
             chunker=chunker,
             embedder=embedder,
+            content_classification_model=content_classification_model,
             document_index=document_index,
             document_batch=document_batch,
             index_attempt_metadata=index_attempt_metadata,
@@ -450,6 +531,7 @@ def index_doc_batch(
     document_batch: list[Document],
     chunker: Chunker,
     embedder: IndexingEmbedder,
+    content_classification_model: ContentClassificationModel,
     document_index: DocumentIndex,
     index_attempt_metadata: IndexAttemptMetadata,
     db_session: Session,
@@ -526,6 +608,14 @@ def index_doc_batch(
         else ([], [])
     )
 
+    (
+        chunks_with_embeddings_scores,
+        chunk_content_scores,
+        chunk_content_classification_failures,
+    ) = _get_aggregated_boost_factor(
+        chunks_with_embeddings, content_classification_model
+    )
+
     updatable_ids = [doc.id for doc in ctx.updatable_docs]
 
     # Acquires a lock on the documents so that no other process can modify them
@@ -554,7 +644,7 @@ def index_doc_batch(
             document_id: len(
                 [
                     chunk
-                    for chunk in chunks_with_embeddings
+                    for chunk in chunks_with_embeddings_scores
                     if chunk.source_document.id == document_id
                 ]
             )
@@ -579,8 +669,9 @@ def index_doc_batch(
                     else DEFAULT_BOOST
                 ),
                 tenant_id=tenant_id,
+                aggregated_boost_factor=chunk_content_scores[chunk_num],
             )
-            for chunk in chunks_with_embeddings
+            for chunk_num, chunk in enumerate(chunks_with_embeddings_scores)
         ]
 
         logger.debug(
@@ -671,7 +762,9 @@ def index_doc_batch(
         new_docs=len([r for r in insertion_records if r.already_existed is False]),
         total_docs=len(filtered_documents),
         total_chunks=len(access_aware_chunks),
-        failures=vector_db_write_failures + embedding_failures,
+        failures=vector_db_write_failures
+        + embedding_failures
+        + chunk_content_classification_failures,
     )
 
     return result
@@ -680,6 +773,7 @@ def index_doc_batch(
 def build_indexing_pipeline(
     *,
     embedder: IndexingEmbedder,
+    content_classification_model: ContentClassificationModel,
     document_index: DocumentIndex,
     db_session: Session,
     tenant_id: str,
@@ -703,6 +797,7 @@ def build_indexing_pipeline(
         index_doc_batch_with_handler,
         chunker=chunker,
         embedder=embedder,
+        content_classification_model=content_classification_model,
         document_index=document_index,
         ignore_time_skip=ignore_time_skip,
         db_session=db_session,
diff --git a/backend/onyx/indexing/models.py b/backend/onyx/indexing/models.py
index 5dffe1b08..d777993eb 100644
--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -83,13 +83,16 @@ class DocMetadataAwareIndexChunk(IndexChunk):
     document_sets: all document sets the source document for this chunk is a part
                    of. This is used for filtering / personas.
     boost: influences the ranking of this chunk at query time. Positive -> ranked higher,
-           negative -> ranked lower.
+           negative -> ranked lower. Not included in aggregated boost calculation
+           for legacy reasons.
+    aggregated_boost_factor: represents non-user-specific aggregated boost calculation
     """
 
     tenant_id: str
     access: "DocumentAccess"
     document_sets: set[str]
     boost: int
+    aggregated_boost_factor: float = 1.0
 
     @classmethod
     def from_index_chunk(
@@ -98,6 +101,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
         access: "DocumentAccess",
         document_sets: set[str],
         boost: int,
+        aggregated_boost_factor: float,
         tenant_id: str,
     ) -> "DocMetadataAwareIndexChunk":
         index_chunk_data = index_chunk.model_dump()
@@ -106,6 +110,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
             access=access,
             document_sets=document_sets,
             boost=boost,
+            aggregated_boost_factor=aggregated_boost_factor,
             tenant_id=tenant_id,
         )
 
diff --git a/backend/onyx/natural_language_processing/search_nlp_models.py b/backend/onyx/natural_language_processing/search_nlp_models.py
index 3a7fcdf6f..faeef3437 100644
--- a/backend/onyx/natural_language_processing/search_nlp_models.py
+++ b/backend/onyx/natural_language_processing/search_nlp_models.py
@@ -36,6 +36,7 @@ from shared_configs.enums import EmbedTextType
 from shared_configs.enums import RerankerProvider
 from shared_configs.model_server_models import ConnectorClassificationRequest
 from shared_configs.model_server_models import ConnectorClassificationResponse
+from shared_configs.model_server_models import ContentClassificationResponses
 from shared_configs.model_server_models import Embedding
 from shared_configs.model_server_models import EmbedRequest
 from shared_configs.model_server_models import EmbedResponse
@@ -377,6 +378,31 @@ class QueryAnalysisModel:
         return response_model.is_keyword, response_model.keywords
 
 
+class ContentClassificationModel:
+    def __init__(
+        self,
+        model_server_host: str = MODEL_SERVER_HOST,
+        model_server_port: int = MODEL_SERVER_PORT,
+    ) -> None:
+        model_server_url = build_model_server_url(model_server_host, model_server_port)
+        self.content_server_endpoint = (
+            model_server_url + "/custom/content-classification"
+        )
+
+    def predict(
+        self,
+        queries: list[str],
+    ) -> list[tuple[int, float]]:
+        response = requests.post(self.content_server_endpoint, json=queries)
+        response.raise_for_status()
+
+        response_model = ContentClassificationResponses(
+            content_classifications=response.json()
+        )
+
+        return response_model.content_classifications
+
+
 class ConnectorClassificationModel:
     def __init__(
         self,
diff --git a/backend/onyx/server/onyx_api/ingestion.py b/backend/onyx/server/onyx_api/ingestion.py
index ec4eeac8d..25e9758c4 100644
--- a/backend/onyx/server/onyx_api/ingestion.py
+++ b/backend/onyx/server/onyx_api/ingestion.py
@@ -19,10 +19,15 @@ from onyx.db.search_settings import get_secondary_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_pipeline import build_indexing_pipeline
+from onyx.natural_language_processing.search_nlp_models import (
+    ContentClassificationModel,
+)
 from onyx.server.onyx_api.models import DocMinimalInfo
 from onyx.server.onyx_api.models import IngestionDocument
 from onyx.server.onyx_api.models import IngestionResult
 from onyx.utils.logger import setup_logger
+from shared_configs.configs import MODEL_SERVER_HOST
+from shared_configs.configs import MODEL_SERVER_PORT
 from shared_configs.contextvars import get_current_tenant_id
 
 logger = setup_logger()
@@ -102,8 +107,13 @@ def upsert_ingestion_doc(
         search_settings=search_settings
     )
 
+    content_classification_model = ContentClassificationModel(
+        model_server_host=MODEL_SERVER_HOST, model_server_port=MODEL_SERVER_PORT
+    )
+
     indexing_pipeline = build_indexing_pipeline(
         embedder=index_embedding_model,
+        content_classification_model=content_classification_model,
         document_index=curr_doc_index,
         ignore_time_skip=True,
         db_session=db_session,
@@ -138,6 +148,7 @@ def upsert_ingestion_doc(
 
         sec_ind_pipeline = build_indexing_pipeline(
             embedder=new_index_embedding_model,
+            content_classification_model=content_classification_model,
             document_index=sec_doc_index,
             ignore_time_skip=True,
             db_session=db_session,
diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
index 016d14c23..3da78ef25 100644
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -25,7 +25,7 @@ google-auth-oauthlib==1.0.0
 httpcore==1.0.5
 httpx[http2]==0.27.0
 httpx-oauth==0.15.1
-huggingface-hub==0.20.1
+huggingface-hub==0.29.0
 inflection==0.5.1
 jira==3.5.1
 jsonref==1.1.0
@@ -71,6 +71,7 @@ requests==2.32.2
 requests-oauthlib==1.3.1
 retry==0.9.2  # This pulls in py which is in CVE-2022-42969, must remove py from image
 rfc3986==1.5.0
+setfit==1.1.1
 simple-salesforce==1.12.6
 slack-sdk==3.20.2
 SQLAlchemy[mypy]==2.0.15
@@ -78,7 +79,7 @@ starlette==0.36.3
 supervisor==4.2.5
 tiktoken==0.7.0
 timeago==1.0.16
-transformers==4.39.2
+transformers==4.49.0
 unstructured==0.15.1
 unstructured-client==0.25.4
 uvicorn==0.21.1
diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt
index b4d4a9f06..e9d7ddd2e 100644
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -8,8 +8,9 @@ pydantic==2.8.2
 retry==0.9.2
 safetensors==0.4.2
 sentence-transformers==2.6.1
+setfit==1.1.1
 torch==2.2.0
-transformers==4.39.2
+transformers==4.49.0
 uvicorn==0.21.1
 voyageai==0.2.3
 litellm==1.61.16
diff --git a/backend/scripts/document_seeding_prep.py b/backend/scripts/document_seeding_prep.py
index 4b643ef4e..c4d9637cb 100644
--- a/backend/scripts/document_seeding_prep.py
+++ b/backend/scripts/document_seeding_prep.py
@@ -161,17 +161,21 @@ overview_doc = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/overview",
     title=overview_title,
     content=overview,
-    title_embedding=model.encode(f"search_document: {overview_title}"),
-    content_embedding=model.encode(f"search_document: {overview_title}\n{overview}"),
+    title_embedding=list(model.encode(f"search_document: {overview_title}")),
+    content_embedding=list(
+        model.encode(f"search_document: {overview_title}\n{overview}")
+    ),
 )
 
 enterprise_search_doc = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/enterprise_search",
     title=enterprise_search_title,
     content=enterprise_search_1,
-    title_embedding=model.encode(f"search_document: {enterprise_search_title}"),
-    content_embedding=model.encode(
-        f"search_document: {enterprise_search_title}\n{enterprise_search_1}"
+    title_embedding=list(model.encode(f"search_document: {enterprise_search_title}")),
+    content_embedding=list(
+        model.encode(
+            f"search_document: {enterprise_search_title}\n{enterprise_search_1}"
+        )
     ),
 )
 
@@ -179,9 +183,11 @@ enterprise_search_doc_2 = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/enterprise_search",
     title=enterprise_search_title,
     content=enterprise_search_2,
-    title_embedding=model.encode(f"search_document: {enterprise_search_title}"),
-    content_embedding=model.encode(
-        f"search_document: {enterprise_search_title}\n{enterprise_search_2}"
+    title_embedding=list(model.encode(f"search_document: {enterprise_search_title}")),
+    content_embedding=list(
+        model.encode(
+            f"search_document: {enterprise_search_title}\n{enterprise_search_2}"
+        )
     ),
     chunk_ind=1,
 )
@@ -190,9 +196,9 @@ ai_platform_doc = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/ai_platform",
     title=ai_platform_title,
     content=ai_platform,
-    title_embedding=model.encode(f"search_document: {ai_platform_title}"),
-    content_embedding=model.encode(
-        f"search_document: {ai_platform_title}\n{ai_platform}"
+    title_embedding=list(model.encode(f"search_document: {ai_platform_title}")),
+    content_embedding=list(
+        model.encode(f"search_document: {ai_platform_title}\n{ai_platform}")
     ),
 )
 
@@ -200,9 +206,9 @@ customer_support_doc = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/support",
     title=customer_support_title,
     content=customer_support,
-    title_embedding=model.encode(f"search_document: {customer_support_title}"),
-    content_embedding=model.encode(
-        f"search_document: {customer_support_title}\n{customer_support}"
+    title_embedding=list(model.encode(f"search_document: {customer_support_title}")),
+    content_embedding=list(
+        model.encode(f"search_document: {customer_support_title}\n{customer_support}")
     ),
 )
 
@@ -210,17 +216,17 @@ sales_doc = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/sales",
     title=sales_title,
     content=sales,
-    title_embedding=model.encode(f"search_document: {sales_title}"),
-    content_embedding=model.encode(f"search_document: {sales_title}\n{sales}"),
+    title_embedding=list(model.encode(f"search_document: {sales_title}")),
+    content_embedding=list(model.encode(f"search_document: {sales_title}\n{sales}")),
 )
 
 operations_doc = SeedPresaveDocument(
     url="https://docs.onyx.app/more/use_cases/operations",
     title=operations_title,
     content=operations,
-    title_embedding=model.encode(f"search_document: {operations_title}"),
-    content_embedding=model.encode(
-        f"search_document: {operations_title}\n{operations}"
+    title_embedding=list(model.encode(f"search_document: {operations_title}")),
+    content_embedding=list(
+        model.encode(f"search_document: {operations_title}\n{operations}")
     ),
 )
 
diff --git a/backend/scripts/query_time_check/seed_dummy_docs.py b/backend/scripts/query_time_check/seed_dummy_docs.py
index b94b413e2..000c18543 100644
--- a/backend/scripts/query_time_check/seed_dummy_docs.py
+++ b/backend/scripts/query_time_check/seed_dummy_docs.py
@@ -99,6 +99,7 @@ def generate_dummy_chunk(
         ),
         document_sets={document_set for document_set in document_set_names},
         boost=random.randint(-1, 1),
+        aggregated_boost_factor=random.random(),
         tenant_id=POSTGRES_DEFAULT_SCHEMA,
     )
 
diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py
index b21c53d69..e2a9798e0 100644
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -25,6 +25,9 @@ CONNECTOR_CLASSIFIER_MODEL_REPO = "Danswer/filter-extraction-model"
 CONNECTOR_CLASSIFIER_MODEL_TAG = "1.0.0"
 INTENT_MODEL_VERSION = "danswer/hybrid-intent-token-classifier"
 INTENT_MODEL_TAG = "v1.0.3"
+CONTENT_MODEL_VERSION = (
+    "sentence-transformers/paraphrase-mpnet-base-v2"  # TODO: replace with Onyx FT model
+)
 
 
 # Bi-Encoder, other details
diff --git a/backend/shared_configs/model_server_models.py b/backend/shared_configs/model_server_models.py
index 644f315fa..41042bf6b 100644
--- a/backend/shared_configs/model_server_models.py
+++ b/backend/shared_configs/model_server_models.py
@@ -73,6 +73,14 @@ class IntentResponse(BaseModel):
     keywords: list[str]
 
 
+class ContentClassificationRequests(BaseModel):
+    queries: list[str]
+
+
+class ContentClassificationResponses(BaseModel):
+    content_classifications: list[tuple[int, float]]
+
+
 class SupportedEmbeddingModel(BaseModel):
     name: str
     dim: int