Adjust time_updated assignment + increase frozen timeout to 3hrs

2025-09-19 20:24:32 +02:00 · 2023-10-28 00:10:07 -07:00
parent e744c6b75a
commit bfa338e142
1 changed files with 10 additions and 3 deletions
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -164,18 +164,18 @@ def cleanup_indexing_jobs(
        )
        for index_attempt in in_progress_indexing_attempts:
            if index_attempt.id in existing_jobs:
-                # check to see if the job has been updated in the last hour, if not
+                # check to see if the job has been updated in the 3 hours, if not
                # assume it to frozen in some bad state and just mark it as failed. Note: this relies
                # on the fact that the `time_updated` field is constantly updated every
                # batch of documents indexed
                current_db_time = get_db_current_time(db_session=db_session)
                time_since_update = current_db_time - index_attempt.time_updated
-                if time_since_update.seconds > 60 * 60:
+                if time_since_update.seconds > 3 * 60 * 60:
                    existing_jobs[index_attempt.id].cancel()
                    mark_run_failed(
                        db_session=db_session,
                        index_attempt=index_attempt,
-                        failure_reason="Indexing run frozen - no updates in last hour. "
+                        failure_reason="Indexing run frozen - no updates in 3 hours. "
                        "The run will be re-attempted at next scheduled indexing time.",
                    )
            else:
@@ -298,6 +298,13 @@ def _run_indexing(
                net_doc_change += new_docs
                chunk_count += total_batch_chunks
                document_count += len(doc_batch)
+
+                # commit transaction so that the `update` below begins
+                # with a brand new tracsaction. Postgres uses the start
+                # of the transactions when computing `NOW()`, so if we have
+                # a long running transaction, the `time_updated` field will
+                # be inaccurate
+                db_session.commit()
                update_docs_indexed(
                    db_session=db_session,
                    index_attempt=attempt,