mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-18 11:34:12 +02:00
Add option to disable document cleanup
This commit is contained in:
@@ -10,6 +10,7 @@ from danswer.background.connector_deletion import (
|
|||||||
_delete_connector_credential_pair_batch,
|
_delete_connector_credential_pair_batch,
|
||||||
)
|
)
|
||||||
from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt
|
from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt
|
||||||
|
from danswer.configs.app_configs import DISABLE_DOCUMENT_CLEANUP
|
||||||
from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET
|
from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET
|
||||||
from danswer.connectors.factory import instantiate_connector
|
from danswer.connectors.factory import instantiate_connector
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
@@ -232,7 +233,7 @@ def _run_indexing(
|
|||||||
docs_removed_from_index=0,
|
docs_removed_from_index=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_listing_complete:
|
if is_listing_complete and not DISABLE_DOCUMENT_CLEANUP:
|
||||||
# clean up all documents from the index that have not been returned from the connector
|
# clean up all documents from the index that have not been returned from the connector
|
||||||
all_indexed_document_ids = {
|
all_indexed_document_ids = {
|
||||||
d.id
|
d.id
|
||||||
|
@@ -196,6 +196,10 @@ ENABLE_MINI_CHUNK = os.environ.get("ENABLE_MINI_CHUNK", "").lower() == "true"
|
|||||||
MINI_CHUNK_SIZE = 150
|
MINI_CHUNK_SIZE = 150
|
||||||
# Timeout to wait for job's last update before killing it, in hours
|
# Timeout to wait for job's last update before killing it, in hours
|
||||||
CLEANUP_INDEXING_JOBS_TIMEOUT = int(os.environ.get("CLEANUP_INDEXING_JOBS_TIMEOUT", 3))
|
CLEANUP_INDEXING_JOBS_TIMEOUT = int(os.environ.get("CLEANUP_INDEXING_JOBS_TIMEOUT", 3))
|
||||||
|
# If set to true, then will not clean up documents that "no longer exist" when running Load connectors
|
||||||
|
DISABLE_DOCUMENT_CLEANUP = (
|
||||||
|
os.environ.get("DISABLE_DOCUMENT_CLEANUP", "").lower() == "true"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
#####
|
#####
|
||||||
|
@@ -140,6 +140,7 @@ services:
|
|||||||
- GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
|
- GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
|
||||||
- NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
|
- NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
|
||||||
- GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
|
- GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
|
||||||
|
- DISABLE_DOCUMENT_CLEANUP=${DISABLE_DOCUMENT_CLEANUP:-}
|
||||||
# Danswer SlackBot Configs
|
# Danswer SlackBot Configs
|
||||||
- DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-}
|
- DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-}
|
||||||
- DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-}
|
- DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-}
|
||||||
|
Reference in New Issue
Block a user