diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index df287b58d7b3..ccebfdb213d2 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -223,6 +223,11 @@ MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE = int( os.environ.get("MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE", 0) ) +# comma delimited list of zendesk article labels to skip indexing for +ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS = os.environ.get( + "ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS", "" +).split(",") + ##### # Indexing Configs diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py index fc9b703c6ec9..287d7c30c5d4 100644 --- a/backend/danswer/connectors/zendesk/connector.py +++ b/backend/danswer/connectors/zendesk/connector.py @@ -4,6 +4,7 @@ from zenpy import Zenpy # type: ignore from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.app_configs import ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( time_str_to_utc, @@ -81,7 +82,14 @@ class ZendeskConnector(LoadConnector, PollConnector): ) doc_batch = [] for article in articles: - if article.body is None or article.draft: + if ( + article.body is None + or article.draft + or any( + label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS + for label in article.label_names + ) + ): continue doc_batch.append(_article_to_document(article))