From 2ff207218ef1d2d838202464e109a25bd839d408 Mon Sep 17 00:00:00 2001 From: mattboret Date: Fri, 3 May 2024 18:04:09 +0200 Subject: [PATCH] Confluence: Add config to index only active pages (#1348) Co-authored-by: Matthieu Boret --- backend/danswer/configs/app_configs.py | 7 ++++++- backend/danswer/connectors/confluence/connector.py | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 809e636651..4dbae24952 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -3,7 +3,6 @@ import os from danswer.configs.constants import AuthType from danswer.configs.constants import DocumentIndexType - ##### # App Configs ##### @@ -167,6 +166,12 @@ CONFLUENCE_CONNECTOR_LABELS_TO_SKIP = [ ) if ignored_tag ] + +# Avoid to get archived pages +CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES = ( + os.environ.get("CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES", "").lower() == "true" +) + JIRA_CONNECTOR_LABELS_TO_SKIP = [ ignored_tag for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",") diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index f9f5e7c3bb..b0272d44cb 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -11,6 +11,7 @@ import bs4 from atlassian import Confluence # type:ignore from requests import HTTPError +from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE from danswer.configs.app_configs import INDEX_BATCH_SIZE @@ -219,6 +220,9 @@ class ConfluenceConnector(LoadConnector, PollConnector): self.space, start=start_ind, limit=batch_size, + status="current" + if CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES + else None, expand="body.storage.value,version", ) except Exception: @@ -237,6 +241,9 @@ class ConfluenceConnector(LoadConnector, PollConnector): self.space, start=start_ind + i, limit=1, + status="current" + if CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES + else None, expand="body.storage.value,version", ) )