mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-26 20:08:38 +02:00
Feature/log despam (#2022)
* move a lot of log spam to debug level. Consolidate some info level logging * reformat more indexing logging
This commit is contained in:
@@ -277,11 +277,9 @@ def _run_indexing(
|
||||
run_dt=run_end_dt,
|
||||
)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
logger.info(
|
||||
f"Indexed or refreshed {document_count} total documents for a total of {chunk_count} indexed chunks"
|
||||
)
|
||||
logger.info(
|
||||
f"Connector successfully finished, elapsed time: {time.time() - start_time} seconds"
|
||||
f"Connector succeeded: docs={document_count} chunks={chunk_count} elapsed={elapsed_time:.2f}s"
|
||||
)
|
||||
|
||||
|
||||
@@ -330,17 +328,19 @@ def run_indexing_entrypoint(index_attempt_id: int, is_ee: bool = False) -> None:
|
||||
attempt = _prepare_index_attempt(db_session, index_attempt_id)
|
||||
|
||||
logger.info(
|
||||
f"Running indexing attempt for connector: '{attempt.connector_credential_pair.connector.name}', "
|
||||
f"with config: '{attempt.connector_credential_pair.connector.connector_specific_config}', and "
|
||||
f"with credentials: '{attempt.connector_credential_pair.connector_id}'"
|
||||
f"Indexing starting: "
|
||||
f"connector='{attempt.connector_credential_pair.connector.name}' "
|
||||
f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
|
||||
f"credentials='{attempt.connector_credential_pair.connector_id}'"
|
||||
)
|
||||
|
||||
_run_indexing(db_session, attempt)
|
||||
|
||||
logger.info(
|
||||
f"Completed indexing attempt for connector: '{attempt.connector_credential_pair.connector.name}', "
|
||||
f"with config: '{attempt.connector_credential_pair.connector.connector_specific_config}', and "
|
||||
f"with credentials: '{attempt.connector_credential_pair.connector_id}'"
|
||||
f"Indexing finished: "
|
||||
f"connector='{attempt.connector_credential_pair.connector.name}' "
|
||||
f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
|
||||
f"credentials='{attempt.connector_credential_pair.connector_id}'"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f"Indexing job with ID '{index_attempt_id}' failed due to {e}")
|
||||
|
@@ -283,11 +283,13 @@ def kickoff_indexing_jobs(
|
||||
if attempt.id not in existing_jobs
|
||||
]
|
||||
|
||||
logger.info(f"Found {len(new_indexing_attempts)} new indexing tasks.")
|
||||
logger.debug(f"Found {len(new_indexing_attempts)} new indexing task(s).")
|
||||
|
||||
if not new_indexing_attempts:
|
||||
return existing_jobs
|
||||
|
||||
indexing_attempt_count = 0
|
||||
|
||||
for attempt, embedding_model in new_indexing_attempts:
|
||||
use_secondary_index = (
|
||||
embedding_model.status == IndexModelStatus.FUTURE
|
||||
@@ -329,15 +331,29 @@ def kickoff_indexing_jobs(
|
||||
)
|
||||
|
||||
if run:
|
||||
secondary_str = "(secondary index) " if use_secondary_index else ""
|
||||
if indexing_attempt_count == 0:
|
||||
logger.info(
|
||||
f"Indexing dispatch starts: pending={len(new_indexing_attempts)}"
|
||||
)
|
||||
|
||||
indexing_attempt_count += 1
|
||||
secondary_str = " (secondary index)" if use_secondary_index else ""
|
||||
logger.info(
|
||||
f"Kicked off {secondary_str}"
|
||||
f"indexing attempt for connector: '{attempt.connector_credential_pair.connector.name}', "
|
||||
f"with config: '{attempt.connector_credential_pair.connector.connector_specific_config}', and "
|
||||
f"with credentials: '{attempt.connector_credential_pair.credential_id}'"
|
||||
f"Indexing dispatched{secondary_str}: "
|
||||
f"connector='{attempt.connector_credential_pair.connector.name}' "
|
||||
f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
|
||||
f"credentials='{attempt.connector_credential_pair.credential_id}'"
|
||||
)
|
||||
existing_jobs_copy[attempt.id] = run
|
||||
|
||||
if indexing_attempt_count > 0:
|
||||
logger.info(
|
||||
f"Indexing dispatch results: "
|
||||
f"initial_pending={len(new_indexing_attempts)} "
|
||||
f"started={indexing_attempt_count} "
|
||||
f"remaining={len(new_indexing_attempts) - indexing_attempt_count}"
|
||||
)
|
||||
|
||||
return existing_jobs_copy
|
||||
|
||||
|
||||
@@ -355,7 +371,7 @@ def update_loop(
|
||||
# batch of documents indexed
|
||||
|
||||
if db_embedding_model.cloud_provider_id is None:
|
||||
logger.info("Running a first inference to warm up embedding model")
|
||||
logger.debug("Running a first inference to warm up embedding model")
|
||||
warm_up_encoders(
|
||||
embedding_model=db_embedding_model,
|
||||
model_server_host=INDEXING_MODEL_SERVER_HOST,
|
||||
@@ -392,11 +408,11 @@ def update_loop(
|
||||
while True:
|
||||
start = time.time()
|
||||
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
|
||||
logger.info(f"Running update, current UTC time: {start_time_utc}")
|
||||
logger.debug(f"Running update, current UTC time: {start_time_utc}")
|
||||
|
||||
if existing_jobs:
|
||||
# TODO: make this debug level once the "no jobs are being scheduled" issue is resolved
|
||||
logger.info(
|
||||
logger.debug(
|
||||
"Found existing indexing jobs: "
|
||||
f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}"
|
||||
)
|
||||
@@ -422,7 +438,7 @@ def update__main() -> None:
|
||||
set_is_ee_based_on_env_variable()
|
||||
init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)
|
||||
|
||||
logger.info("Starting Indexing Loop")
|
||||
logger.info("Starting indexing service")
|
||||
update_loop()
|
||||
|
||||
|
||||
|
@@ -217,7 +217,7 @@ def warm_up_encoders(
|
||||
)
|
||||
|
||||
# May not be the exact same tokenizer used for the indexing flow
|
||||
logger.info(f"Warming up encoder model: {model_name}")
|
||||
logger.debug(f"Warming up encoder model: {model_name}")
|
||||
get_tokenizer(model_name=model_name, provider_type=provider_type).encode(
|
||||
warm_up_str
|
||||
)
|
||||
|
Reference in New Issue
Block a user