Warm up models before first document indexed (#333)

This commit is contained in:
Yuhong Sun 2023-08-24 20:01:50 -07:00 committed by GitHub
parent cb13f5b18b
commit 384bf1befe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 1 deletions

View File

@ -35,6 +35,7 @@ from danswer.db.index_attempt import update_docs_indexed
from danswer.db.models import Connector
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.search.search_utils import warm_up_models
from danswer.utils.logger import IndexAttemptSingleton
from danswer.utils.logger import setup_logger
@ -417,4 +418,7 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non
if __name__ == "__main__":
logger.info("Warming up Embedding Model(s)")
warm_up_models(indexer_only=True)
logger.info("Starting Indexing Loop")
update_loop()

View File

@ -61,10 +61,14 @@ def get_default_intent_model() -> TFDistilBertForSequenceClassification:
return _INTENT_MODEL
def warm_up_models() -> None:
def warm_up_models(indexer_only: bool = False) -> None:
warm_up_str = "Danswer is amazing"
get_default_tokenizer()(warm_up_str)
get_default_embedding_model().encode(warm_up_str)
if indexer_only:
return
cross_encoders = get_default_reranking_model_ensemble()
[
cross_encoder.predict((warm_up_str, warm_up_str))