diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 7ed879299296..b3ee6f56fe14 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -157,8 +157,11 @@ def run_indexing_jobs(db_session: Session) -> None: last_run_time = get_last_successful_attempt_start_time( attempt.connector_id, attempt.credential_id, db_session ) + # Covers very unlikely case that time offset check from DB having tiny variations that coincide with + # a new document being created + safe_last_run_time = max(last_run_time - 1, 0.0) doc_batch_generator = runnable_connector.poll_source( - last_run_time, time.time() + safe_last_run_time, time.time() ) else: diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index 97c4e21c51e8..e5e51287b84c 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -8,6 +8,7 @@ from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER +from danswer.utils.logger import setup_logger from sqlalchemy import text from sqlalchemy.engine import create_engine from sqlalchemy.engine import Engine @@ -16,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.orm import Session +logger = setup_logger() SYNC_DB_API = "psycopg2" ASYNC_DB_API = "asyncpg" @@ -28,6 +30,10 @@ _ASYNC_ENGINE: AsyncEngine | None = None def get_db_current_time(db_session: Session) -> datetime: + """Get the current time from Postgres representing the start of the transaction + Within the same transaction this value will not update + This datetime object returned should be timezone aware, default Postgres timezone is UTC + """ result = db_session.execute(text("SELECT NOW()")).scalar() if result is None: raise ValueError("Database did not return a time") @@ -37,9 +43,11 @@ def get_db_current_time(db_session: Session) -> datetime: def translate_db_time_to_server_time( db_time: datetime, db_session: Session ) -> datetime: - server_now = datetime.now() + """If a different database driver is used which does not include timezone info, + this should hit an exception rather than being wrong""" + server_now = datetime.now(timezone.utc) db_now = get_db_current_time(db_session) - time_diff = server_now - db_now.astimezone(timezone.utc).replace(tzinfo=None) + time_diff = server_now - db_now return db_time + time_diff