orphan tag cleanup optimization (#4651)

* move orphan tag cleanup to final cleanup section of associated tparent tasks

* naming
This commit is contained in:
Evan Lohn
2025-05-02 10:22:59 -07:00
committed by GitHub
parent 6d9693dc51
commit 55e4465782
5 changed files with 13 additions and 0 deletions

View File

@@ -44,6 +44,7 @@ from onyx.db.search_settings import get_all_search_settings
from onyx.db.sync_record import cleanup_sync_records
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.db.tag import delete_orphan_tags__no_commit
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
@@ -445,6 +446,9 @@ def monitor_connector_deletion_taskset(
db_session=db_session,
)
# delete orphan tags
delete_orphan_tags__no_commit(db_session)
# Store IDs before potentially expiring cc_pair
connector_id_to_delete = cc_pair.connector_id
credential_id_to_delete = cc_pair.credential_id

View File

@@ -49,6 +49,7 @@ from onyx.db.models import ConnectorCredentialPair
from onyx.db.search_settings import get_current_search_settings
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.db.tag import delete_orphan_tags__no_commit
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
@@ -561,6 +562,8 @@ def monitor_ccpair_pruning_taskset(
num_docs_synced=initial,
)
delete_orphan_tags__no_commit(db_session)
redis_connector.prune.taskset_clear()
redis_connector.prune.generator_clear()
redis_connector.prune.set_fence(None)

View File

@@ -148,6 +148,8 @@ def delete_document_tags_for_documents__no_commit(
stmt = delete(Document__Tag).where(Document__Tag.document_id.in_(document_ids))
db_session.execute(stmt)
def delete_orphan_tags__no_commit(db_session: Session) -> None:
orphan_tags_query = select(Tag.id).where(
~db_session.query(Document__Tag.tag_id)
.filter(Document__Tag.tag_id == Tag.id)

View File

@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session
from onyx.db.document import delete_documents_complete__no_commit
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.search_settings import get_active_search_settings
from onyx.db.tag import delete_orphan_tags__no_commit
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
# Modify sys.path
@@ -83,6 +84,7 @@ def _unsafe_deletion(
db_session=db_session,
document_ids=[document.id for document in documents],
)
delete_orphan_tags__no_commit(db_session=db_session)
num_docs_deleted += len(documents)

View File

@@ -16,6 +16,7 @@ from onyx.context.search.models import IndexFilters # noqa: E402
from onyx.document_index.interfaces import VespaChunkRequest # noqa: E402
from onyx.db.engine import get_session_context_manager # noqa: E402
from onyx.db.document import delete_documents_complete__no_commit # noqa: E402
from onyx.db.tag import delete_orphan_tags__no_commit # noqa: E402
from onyx.db.search_settings import get_current_search_settings # noqa: E402
from onyx.document_index.vespa.index import VespaIndex # noqa: E402
from onyx.db.document import get_document # noqa: E402
@@ -128,6 +129,7 @@ def main() -> None:
delete_documents_complete__no_commit(
db_session, successfully_vespa_deleted_doc_ids
)
delete_orphan_tags__no_commit(db_session)
db_session.commit()
except Exception as e:
print(f"Error deleting documents from Postgres: {e}")