mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-29 05:15:12 +02:00
Feature/log despam (#2022)
* move a lot of log spam to debug level. Consolidate some info level logging * reformat more indexing logging
This commit is contained in:
@@ -277,11 +277,9 @@ def _run_indexing(
|
|||||||
run_dt=run_end_dt,
|
run_dt=run_end_dt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Indexed or refreshed {document_count} total documents for a total of {chunk_count} indexed chunks"
|
f"Connector succeeded: docs={document_count} chunks={chunk_count} elapsed={elapsed_time:.2f}s"
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"Connector successfully finished, elapsed time: {time.time() - start_time} seconds"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -330,17 +328,19 @@ def run_indexing_entrypoint(index_attempt_id: int, is_ee: bool = False) -> None:
|
|||||||
attempt = _prepare_index_attempt(db_session, index_attempt_id)
|
attempt = _prepare_index_attempt(db_session, index_attempt_id)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Running indexing attempt for connector: '{attempt.connector_credential_pair.connector.name}', "
|
f"Indexing starting: "
|
||||||
f"with config: '{attempt.connector_credential_pair.connector.connector_specific_config}', and "
|
f"connector='{attempt.connector_credential_pair.connector.name}' "
|
||||||
f"with credentials: '{attempt.connector_credential_pair.connector_id}'"
|
f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
|
||||||
|
f"credentials='{attempt.connector_credential_pair.connector_id}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
_run_indexing(db_session, attempt)
|
_run_indexing(db_session, attempt)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Completed indexing attempt for connector: '{attempt.connector_credential_pair.connector.name}', "
|
f"Indexing finished: "
|
||||||
f"with config: '{attempt.connector_credential_pair.connector.connector_specific_config}', and "
|
f"connector='{attempt.connector_credential_pair.connector.name}' "
|
||||||
f"with credentials: '{attempt.connector_credential_pair.connector_id}'"
|
f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
|
||||||
|
f"credentials='{attempt.connector_credential_pair.connector_id}'"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"Indexing job with ID '{index_attempt_id}' failed due to {e}")
|
logger.exception(f"Indexing job with ID '{index_attempt_id}' failed due to {e}")
|
||||||
|
@@ -283,11 +283,13 @@ def kickoff_indexing_jobs(
|
|||||||
if attempt.id not in existing_jobs
|
if attempt.id not in existing_jobs
|
||||||
]
|
]
|
||||||
|
|
||||||
logger.info(f"Found {len(new_indexing_attempts)} new indexing tasks.")
|
logger.debug(f"Found {len(new_indexing_attempts)} new indexing task(s).")
|
||||||
|
|
||||||
if not new_indexing_attempts:
|
if not new_indexing_attempts:
|
||||||
return existing_jobs
|
return existing_jobs
|
||||||
|
|
||||||
|
indexing_attempt_count = 0
|
||||||
|
|
||||||
for attempt, embedding_model in new_indexing_attempts:
|
for attempt, embedding_model in new_indexing_attempts:
|
||||||
use_secondary_index = (
|
use_secondary_index = (
|
||||||
embedding_model.status == IndexModelStatus.FUTURE
|
embedding_model.status == IndexModelStatus.FUTURE
|
||||||
@@ -329,15 +331,29 @@ def kickoff_indexing_jobs(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if run:
|
if run:
|
||||||
secondary_str = "(secondary index) " if use_secondary_index else ""
|
if indexing_attempt_count == 0:
|
||||||
|
logger.info(
|
||||||
|
f"Indexing dispatch starts: pending={len(new_indexing_attempts)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
indexing_attempt_count += 1
|
||||||
|
secondary_str = " (secondary index)" if use_secondary_index else ""
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Kicked off {secondary_str}"
|
f"Indexing dispatched{secondary_str}: "
|
||||||
f"indexing attempt for connector: '{attempt.connector_credential_pair.connector.name}', "
|
f"connector='{attempt.connector_credential_pair.connector.name}' "
|
||||||
f"with config: '{attempt.connector_credential_pair.connector.connector_specific_config}', and "
|
f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
|
||||||
f"with credentials: '{attempt.connector_credential_pair.credential_id}'"
|
f"credentials='{attempt.connector_credential_pair.credential_id}'"
|
||||||
)
|
)
|
||||||
existing_jobs_copy[attempt.id] = run
|
existing_jobs_copy[attempt.id] = run
|
||||||
|
|
||||||
|
if indexing_attempt_count > 0:
|
||||||
|
logger.info(
|
||||||
|
f"Indexing dispatch results: "
|
||||||
|
f"initial_pending={len(new_indexing_attempts)} "
|
||||||
|
f"started={indexing_attempt_count} "
|
||||||
|
f"remaining={len(new_indexing_attempts) - indexing_attempt_count}"
|
||||||
|
)
|
||||||
|
|
||||||
return existing_jobs_copy
|
return existing_jobs_copy
|
||||||
|
|
||||||
|
|
||||||
@@ -355,7 +371,7 @@ def update_loop(
|
|||||||
# batch of documents indexed
|
# batch of documents indexed
|
||||||
|
|
||||||
if db_embedding_model.cloud_provider_id is None:
|
if db_embedding_model.cloud_provider_id is None:
|
||||||
logger.info("Running a first inference to warm up embedding model")
|
logger.debug("Running a first inference to warm up embedding model")
|
||||||
warm_up_encoders(
|
warm_up_encoders(
|
||||||
embedding_model=db_embedding_model,
|
embedding_model=db_embedding_model,
|
||||||
model_server_host=INDEXING_MODEL_SERVER_HOST,
|
model_server_host=INDEXING_MODEL_SERVER_HOST,
|
||||||
@@ -392,11 +408,11 @@ def update_loop(
|
|||||||
while True:
|
while True:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
|
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
logger.info(f"Running update, current UTC time: {start_time_utc}")
|
logger.debug(f"Running update, current UTC time: {start_time_utc}")
|
||||||
|
|
||||||
if existing_jobs:
|
if existing_jobs:
|
||||||
# TODO: make this debug level once the "no jobs are being scheduled" issue is resolved
|
# TODO: make this debug level once the "no jobs are being scheduled" issue is resolved
|
||||||
logger.info(
|
logger.debug(
|
||||||
"Found existing indexing jobs: "
|
"Found existing indexing jobs: "
|
||||||
f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}"
|
f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}"
|
||||||
)
|
)
|
||||||
@@ -422,7 +438,7 @@ def update__main() -> None:
|
|||||||
set_is_ee_based_on_env_variable()
|
set_is_ee_based_on_env_variable()
|
||||||
init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)
|
init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)
|
||||||
|
|
||||||
logger.info("Starting Indexing Loop")
|
logger.info("Starting indexing service")
|
||||||
update_loop()
|
update_loop()
|
||||||
|
|
||||||
|
|
||||||
|
@@ -217,7 +217,7 @@ def warm_up_encoders(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# May not be the exact same tokenizer used for the indexing flow
|
# May not be the exact same tokenizer used for the indexing flow
|
||||||
logger.info(f"Warming up encoder model: {model_name}")
|
logger.debug(f"Warming up encoder model: {model_name}")
|
||||||
get_tokenizer(model_name=model_name, provider_type=provider_type).encode(
|
get_tokenizer(model_name=model_name, provider_type=provider_type).encode(
|
||||||
warm_up_str
|
warm_up_str
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user