support for zendesk help center (#661)

This commit is contained in:
Bryan Peterson
2023-11-02 05:11:56 +01:00
committed by GitHub
parent e8f778ccb5
commit 44e3dcb19f
14 changed files with 370 additions and 1 deletions

View File

@@ -70,6 +70,7 @@ class DocumentSource(str, Enum):
DOCUMENT360 = "document360"
GONG = "gong"
GOOGLE_SITES = "google_sites"
ZENDESK = "zendesk"
class DocumentIndexType(str, Enum):

View File

@@ -26,6 +26,7 @@ from danswer.connectors.slack.connector import SlackLoadConnector
from danswer.connectors.slack.connector import SlackPollConnector
from danswer.connectors.web.connector import WebConnector
from danswer.connectors.zulip.connector import ZulipConnector
from danswer.connectors.zendesk.connector import ZendeskConnector
class ConnectorMissingException(Exception):
@@ -58,6 +59,7 @@ def identify_connector_class(
DocumentSource.DOCUMENT360: Document360Connector,
DocumentSource.GONG: GongConnector,
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
DocumentSource.ZENDESK: ZendeskConnector,
}
connector_by_source = connector_map.get(source, {})

View File

@@ -0,0 +1,63 @@
from typing import Any
from zenpy import Zenpy
from zenpy.lib.api_objects.help_centre_objects import Article
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import Document, Section
from danswer.connectors.interfaces import GenerateDocumentsOutput, LoadConnector, PollConnector, SecondsSinceUnixEpoch
class ZendeskClientNotSetUpError(PermissionError):
def __init__(self) -> None:
super().__init__(
"Zendesk Client is not set up, was load_credentials called?"
)
class ZendeskConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE
) -> None:
self.batch_size = batch_size
self.zendesk_client: Zenpy | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.zendesk_client = Zenpy(
subdomain=credentials["zendesk_subdomain"],
email=credentials["zendesk_email"],
token=credentials["zendesk_token"],
)
return None
def load_from_state(self) -> GenerateDocumentsOutput:
return self.poll_source(None, None)
def _article_to_document(self, article: Article) -> Document:
return Document(
id=f"article:{article.id}",
sections=[Section(link=article.html_url, text=article.body)],
source=DocumentSource.ZENDESK,
semantic_identifier="Article: " + article.title,
metadata={
"type": "article",
"updated_at": article.updated_at,
}
)
def poll_source(self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None) -> GenerateDocumentsOutput:
if self.zendesk_client is None:
raise ZendeskClientNotSetUpError()
articles = self.zendesk_client.help_center.articles(cursor_pagination=True) if start is None else self.zendesk_client.help_center.articles.incremental(start_time=int(start))
doc_batch = []
for article in articles:
if article.body is None:
continue
doc_batch.append(self._article_to_document(article))
if len(doc_batch) >= self.batch_size:
yield doc_batch
doc_batch.clear()

View File

@@ -56,3 +56,4 @@ transformers==4.30.1
uvicorn==0.21.1
zulip==0.8.2
hubspot-api-client==8.1.0
zenpy==2.0.41

View File

@@ -1,5 +1,11 @@
# This file is purely for development use, not included in any builds
import requests
import os
import sys
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
from danswer.document_index.vespa.index import DOCUMENT_ID_ENDPOINT

View File

@@ -1,5 +1,11 @@
import psycopg2
import os
import sys
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD