mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 05:05:34 +02:00
use redis completion signal to double check exit code (#3435)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
@@ -640,18 +640,41 @@ def connector_indexing_proxy_task(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if job.status == "error":
|
if job.status == "error":
|
||||||
|
ignore_exitcode = False
|
||||||
|
|
||||||
exit_code: int | None = None
|
exit_code: int | None = None
|
||||||
if job.process:
|
if job.process:
|
||||||
exit_code = job.process.exitcode
|
exit_code = job.process.exitcode
|
||||||
task_logger.error(
|
|
||||||
"Indexing watchdog - spawned task exceptioned: "
|
# seeing non-deterministic behavior where spawned tasks occasionally return exit code 1
|
||||||
f"attempt={index_attempt_id} "
|
# even though logging clearly indicates that they completed successfully
|
||||||
f"tenant={tenant_id} "
|
# to work around this, we ignore the job error state if the completion signal is OK
|
||||||
f"cc_pair={cc_pair_id} "
|
status_int = redis_connector_index.get_completion()
|
||||||
f"search_settings={search_settings_id} "
|
if status_int:
|
||||||
f"exit_code={exit_code} "
|
status_enum = HTTPStatus(status_int)
|
||||||
f"error={job.exception()}"
|
if status_enum == HTTPStatus.OK:
|
||||||
)
|
ignore_exitcode = True
|
||||||
|
|
||||||
|
if ignore_exitcode:
|
||||||
|
task_logger.warning(
|
||||||
|
"Indexing watchdog - spawned task has non-zero exit code "
|
||||||
|
"but completion signal is OK. Continuing...: "
|
||||||
|
f"attempt={index_attempt_id} "
|
||||||
|
f"tenant={tenant_id} "
|
||||||
|
f"cc_pair={cc_pair_id} "
|
||||||
|
f"search_settings={search_settings_id} "
|
||||||
|
f"exit_code={exit_code}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
task_logger.error(
|
||||||
|
"Indexing watchdog - spawned task exceptioned: "
|
||||||
|
f"attempt={index_attempt_id} "
|
||||||
|
f"tenant={tenant_id} "
|
||||||
|
f"cc_pair={cc_pair_id} "
|
||||||
|
f"search_settings={search_settings_id} "
|
||||||
|
f"exit_code={exit_code} "
|
||||||
|
f"error={job.exception()}"
|
||||||
|
)
|
||||||
|
|
||||||
job.release()
|
job.release()
|
||||||
break
|
break
|
||||||
|
Reference in New Issue
Block a user