Slim connector for Zendesk (#3367)

* Add SlimConnector support for Zendesk

* ZenDesk format changes

* code formating

---------

Co-authored-by: hagen-danswer <hagen@danswer.ai>
This commit is contained in:
SubashMohan 2025-01-06 20:11:41 +05:30 committed by GitHub
parent 7f8194798a
commit c2444a5cff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 153 additions and 1 deletions

View File

@ -26,6 +26,10 @@ env:
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
# Slab
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
# Zendesk
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}

View File

@ -10,17 +10,21 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
time_str_to_utc,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import Section
from onyx.connectors.models import SlimDocument
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.retry_wrapper import retry_builder
MAX_PAGE_SIZE = 30 # Zendesk API maximum
_SLIM_BATCH_SIZE = 1000
class ZendeskCredentialsNotSetUpError(PermissionError):
@ -272,7 +276,7 @@ def _ticket_to_document(
)
class ZendeskConnector(LoadConnector, PollConnector):
class ZendeskConnector(LoadConnector, PollConnector, SlimConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
@ -397,6 +401,43 @@ class ZendeskConnector(LoadConnector, PollConnector):
if doc_batch:
yield doc_batch
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> GenerateSlimDocumentOutput:
slim_doc_batch: list[SlimDocument] = []
if self.content_type == "articles":
articles = _get_articles(
self.client, start_time=int(start) if start else None
)
for article in articles:
slim_doc_batch.append(
SlimDocument(
id=f"article:{article['id']}",
)
)
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
yield slim_doc_batch
slim_doc_batch = []
elif self.content_type == "tickets":
tickets = _get_tickets(
self.client, start_time=int(start) if start else None
)
for ticket in tickets:
slim_doc_batch.append(
SlimDocument(
id=f"zendesk_ticket_{ticket['id']}",
)
)
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
yield slim_doc_batch
slim_doc_batch = []
else:
raise ValueError(f"Unsupported content_type: {self.content_type}")
if slim_doc_batch:
yield slim_doc_batch
if __name__ == "__main__":
import os

View File

@ -0,0 +1,96 @@
import json
import os
import time
from pathlib import Path
import pytest
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import Document
from danswer.connectors.zendesk.connector import ZendeskConnector
def load_test_data(file_name: str = "test_zendesk_data.json") -> dict[str, dict]:
current_dir = Path(__file__).parent
with open(current_dir / file_name, "r") as f:
return json.load(f)
@pytest.fixture
def zendesk_article_connector() -> ZendeskConnector:
connector = ZendeskConnector(content_type="articles")
connector.load_credentials(get_credentials())
return connector
@pytest.fixture
def zendesk_ticket_connector() -> ZendeskConnector:
connector = ZendeskConnector(content_type="tickets")
connector.load_credentials(get_credentials())
return connector
def get_credentials() -> dict[str, str]:
return {
"zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
"zendesk_email": os.environ["ZENDESK_EMAIL"],
"zendesk_token": os.environ["ZENDESK_TOKEN"],
}
@pytest.mark.parametrize(
"connector_fixture", ["zendesk_article_connector", "zendesk_ticket_connector"]
)
def test_zendesk_connector_basic(
request: pytest.FixtureRequest, connector_fixture: str
) -> None:
connector = request.getfixturevalue(connector_fixture)
test_data = load_test_data()
all_docs: list[Document] = []
target_test_doc_id: str
if connector.content_type == "articles":
target_test_doc_id = f"article:{test_data['article']['id']}"
else:
target_test_doc_id = f"zendesk_ticket_{test_data['ticket']['id']}"
target_doc: Document | None = None
for doc_batch in connector.poll_source(0, time.time()):
for doc in doc_batch:
all_docs.append(doc)
if doc.id == target_test_doc_id:
target_doc = doc
assert len(all_docs) > 0, "No documents were retrieved from the connector"
assert (
target_doc is not None
), "Target document was not found in the retrieved documents"
assert target_doc.source == DocumentSource.ZENDESK, "Document source is not ZENDESK"
if connector.content_type == "articles":
print(f"target_doc.semantic_identifier {target_doc.semantic_identifier}")
assert (
target_doc.semantic_identifier
== test_data["article"]["semantic_identifier"]
), "Article title does not match"
else:
assert target_doc.semantic_identifier.startswith(
f"Ticket #{test_data['ticket']['id']}"
), "Ticket ID does not match"
def test_zendesk_connector_slim(zendesk_article_connector: ZendeskConnector) -> None:
# Get full doc IDs
all_full_doc_ids = set()
for doc_batch in zendesk_article_connector.load_from_state():
all_full_doc_ids.update([doc.id for doc in doc_batch])
# Get slim doc IDs
all_slim_doc_ids = set()
for slim_doc_batch in zendesk_article_connector.retrieve_all_slim_documents():
all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])
# Full docs should be subset of slim docs
assert all_full_doc_ids.issubset(
all_slim_doc_ids
), f"Full doc IDs {all_full_doc_ids} not subset of slim doc IDs {all_slim_doc_ids}"

View File

@ -0,0 +1,11 @@
{
"article": {
"id": "17275801227804",
"semantic_identifier": "How can agents leverage knowledge to help customers?"
},
"ticket": {
"id": "1"
}
}