mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-30 01:30:21 +02:00
Slim connector for Zendesk (#3367)
* Add SlimConnector support for Zendesk * ZenDesk format changes * code formating --------- Co-authored-by: hagen-danswer <hagen@danswer.ai>
This commit is contained in:
parent
7f8194798a
commit
c2444a5cff
@ -26,6 +26,10 @@ env:
|
||||
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
# Slab
|
||||
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
|
||||
# Zendesk
|
||||
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
|
||||
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
|
||||
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
|
||||
# Salesforce
|
||||
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
||||
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
||||
|
@ -10,17 +10,21 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
time_str_to_utc,
|
||||
)
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.file_processing.html_utils import parse_html_page_basic
|
||||
from onyx.utils.retry_wrapper import retry_builder
|
||||
|
||||
|
||||
MAX_PAGE_SIZE = 30 # Zendesk API maximum
|
||||
_SLIM_BATCH_SIZE = 1000
|
||||
|
||||
|
||||
class ZendeskCredentialsNotSetUpError(PermissionError):
|
||||
@ -272,7 +276,7 @@ def _ticket_to_document(
|
||||
)
|
||||
|
||||
|
||||
class ZendeskConnector(LoadConnector, PollConnector):
|
||||
class ZendeskConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
def __init__(
|
||||
self,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
@ -397,6 +401,43 @@ class ZendeskConnector(LoadConnector, PollConnector):
|
||||
if doc_batch:
|
||||
yield doc_batch
|
||||
|
||||
def retrieve_all_slim_documents(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
) -> GenerateSlimDocumentOutput:
|
||||
slim_doc_batch: list[SlimDocument] = []
|
||||
if self.content_type == "articles":
|
||||
articles = _get_articles(
|
||||
self.client, start_time=int(start) if start else None
|
||||
)
|
||||
for article in articles:
|
||||
slim_doc_batch.append(
|
||||
SlimDocument(
|
||||
id=f"article:{article['id']}",
|
||||
)
|
||||
)
|
||||
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
|
||||
yield slim_doc_batch
|
||||
slim_doc_batch = []
|
||||
elif self.content_type == "tickets":
|
||||
tickets = _get_tickets(
|
||||
self.client, start_time=int(start) if start else None
|
||||
)
|
||||
for ticket in tickets:
|
||||
slim_doc_batch.append(
|
||||
SlimDocument(
|
||||
id=f"zendesk_ticket_{ticket['id']}",
|
||||
)
|
||||
)
|
||||
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
|
||||
yield slim_doc_batch
|
||||
slim_doc_batch = []
|
||||
else:
|
||||
raise ValueError(f"Unsupported content_type: {self.content_type}")
|
||||
if slim_doc_batch:
|
||||
yield slim_doc_batch
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
@ -0,0 +1,96 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.zendesk.connector import ZendeskConnector
|
||||
|
||||
|
||||
def load_test_data(file_name: str = "test_zendesk_data.json") -> dict[str, dict]:
|
||||
current_dir = Path(__file__).parent
|
||||
with open(current_dir / file_name, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def zendesk_article_connector() -> ZendeskConnector:
|
||||
connector = ZendeskConnector(content_type="articles")
|
||||
connector.load_credentials(get_credentials())
|
||||
return connector
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def zendesk_ticket_connector() -> ZendeskConnector:
|
||||
connector = ZendeskConnector(content_type="tickets")
|
||||
connector.load_credentials(get_credentials())
|
||||
return connector
|
||||
|
||||
|
||||
def get_credentials() -> dict[str, str]:
|
||||
return {
|
||||
"zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
|
||||
"zendesk_email": os.environ["ZENDESK_EMAIL"],
|
||||
"zendesk_token": os.environ["ZENDESK_TOKEN"],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"connector_fixture", ["zendesk_article_connector", "zendesk_ticket_connector"]
|
||||
)
|
||||
def test_zendesk_connector_basic(
|
||||
request: pytest.FixtureRequest, connector_fixture: str
|
||||
) -> None:
|
||||
connector = request.getfixturevalue(connector_fixture)
|
||||
test_data = load_test_data()
|
||||
all_docs: list[Document] = []
|
||||
target_test_doc_id: str
|
||||
if connector.content_type == "articles":
|
||||
target_test_doc_id = f"article:{test_data['article']['id']}"
|
||||
else:
|
||||
target_test_doc_id = f"zendesk_ticket_{test_data['ticket']['id']}"
|
||||
|
||||
target_doc: Document | None = None
|
||||
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
for doc in doc_batch:
|
||||
all_docs.append(doc)
|
||||
if doc.id == target_test_doc_id:
|
||||
target_doc = doc
|
||||
|
||||
assert len(all_docs) > 0, "No documents were retrieved from the connector"
|
||||
assert (
|
||||
target_doc is not None
|
||||
), "Target document was not found in the retrieved documents"
|
||||
assert target_doc.source == DocumentSource.ZENDESK, "Document source is not ZENDESK"
|
||||
|
||||
if connector.content_type == "articles":
|
||||
print(f"target_doc.semantic_identifier {target_doc.semantic_identifier}")
|
||||
assert (
|
||||
target_doc.semantic_identifier
|
||||
== test_data["article"]["semantic_identifier"]
|
||||
), "Article title does not match"
|
||||
else:
|
||||
assert target_doc.semantic_identifier.startswith(
|
||||
f"Ticket #{test_data['ticket']['id']}"
|
||||
), "Ticket ID does not match"
|
||||
|
||||
|
||||
def test_zendesk_connector_slim(zendesk_article_connector: ZendeskConnector) -> None:
|
||||
# Get full doc IDs
|
||||
all_full_doc_ids = set()
|
||||
for doc_batch in zendesk_article_connector.load_from_state():
|
||||
all_full_doc_ids.update([doc.id for doc in doc_batch])
|
||||
|
||||
# Get slim doc IDs
|
||||
all_slim_doc_ids = set()
|
||||
for slim_doc_batch in zendesk_article_connector.retrieve_all_slim_documents():
|
||||
all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])
|
||||
|
||||
# Full docs should be subset of slim docs
|
||||
assert all_full_doc_ids.issubset(
|
||||
all_slim_doc_ids
|
||||
), f"Full doc IDs {all_full_doc_ids} not subset of slim doc IDs {all_slim_doc_ids}"
|
@ -0,0 +1,11 @@
|
||||
{
|
||||
"article": {
|
||||
"id": "17275801227804",
|
||||
"semantic_identifier": "How can agents leverage knowledge to help customers?"
|
||||
|
||||
},
|
||||
"ticket": {
|
||||
"id": "1"
|
||||
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user