mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-03 03:31:09 +02:00
Slim connector for Zendesk (#3367)
* Add SlimConnector support for Zendesk * ZenDesk format changes * code formating --------- Co-authored-by: hagen-danswer <hagen@danswer.ai>
This commit is contained in:
parent
7f8194798a
commit
c2444a5cff
@ -26,6 +26,10 @@ env:
|
|||||||
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
|
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
|
||||||
# Slab
|
# Slab
|
||||||
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
|
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
|
||||||
|
# Zendesk
|
||||||
|
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
|
||||||
|
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
|
||||||
|
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
|
||||||
# Salesforce
|
# Salesforce
|
||||||
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
||||||
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
||||||
|
@ -10,17 +10,21 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
|||||||
time_str_to_utc,
|
time_str_to_utc,
|
||||||
)
|
)
|
||||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||||
|
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||||
from onyx.connectors.interfaces import LoadConnector
|
from onyx.connectors.interfaces import LoadConnector
|
||||||
from onyx.connectors.interfaces import PollConnector
|
from onyx.connectors.interfaces import PollConnector
|
||||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from onyx.connectors.interfaces import SlimConnector
|
||||||
from onyx.connectors.models import BasicExpertInfo
|
from onyx.connectors.models import BasicExpertInfo
|
||||||
from onyx.connectors.models import Document
|
from onyx.connectors.models import Document
|
||||||
from onyx.connectors.models import Section
|
from onyx.connectors.models import Section
|
||||||
|
from onyx.connectors.models import SlimDocument
|
||||||
from onyx.file_processing.html_utils import parse_html_page_basic
|
from onyx.file_processing.html_utils import parse_html_page_basic
|
||||||
from onyx.utils.retry_wrapper import retry_builder
|
from onyx.utils.retry_wrapper import retry_builder
|
||||||
|
|
||||||
|
|
||||||
MAX_PAGE_SIZE = 30 # Zendesk API maximum
|
MAX_PAGE_SIZE = 30 # Zendesk API maximum
|
||||||
|
_SLIM_BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
|
||||||
class ZendeskCredentialsNotSetUpError(PermissionError):
|
class ZendeskCredentialsNotSetUpError(PermissionError):
|
||||||
@ -272,7 +276,7 @@ def _ticket_to_document(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ZendeskConnector(LoadConnector, PollConnector):
|
class ZendeskConnector(LoadConnector, PollConnector, SlimConnector):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
batch_size: int = INDEX_BATCH_SIZE,
|
batch_size: int = INDEX_BATCH_SIZE,
|
||||||
@ -397,6 +401,43 @@ class ZendeskConnector(LoadConnector, PollConnector):
|
|||||||
if doc_batch:
|
if doc_batch:
|
||||||
yield doc_batch
|
yield doc_batch
|
||||||
|
|
||||||
|
def retrieve_all_slim_documents(
|
||||||
|
self,
|
||||||
|
start: SecondsSinceUnixEpoch | None = None,
|
||||||
|
end: SecondsSinceUnixEpoch | None = None,
|
||||||
|
) -> GenerateSlimDocumentOutput:
|
||||||
|
slim_doc_batch: list[SlimDocument] = []
|
||||||
|
if self.content_type == "articles":
|
||||||
|
articles = _get_articles(
|
||||||
|
self.client, start_time=int(start) if start else None
|
||||||
|
)
|
||||||
|
for article in articles:
|
||||||
|
slim_doc_batch.append(
|
||||||
|
SlimDocument(
|
||||||
|
id=f"article:{article['id']}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
|
||||||
|
yield slim_doc_batch
|
||||||
|
slim_doc_batch = []
|
||||||
|
elif self.content_type == "tickets":
|
||||||
|
tickets = _get_tickets(
|
||||||
|
self.client, start_time=int(start) if start else None
|
||||||
|
)
|
||||||
|
for ticket in tickets:
|
||||||
|
slim_doc_batch.append(
|
||||||
|
SlimDocument(
|
||||||
|
id=f"zendesk_ticket_{ticket['id']}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
|
||||||
|
yield slim_doc_batch
|
||||||
|
slim_doc_batch = []
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported content_type: {self.content_type}")
|
||||||
|
if slim_doc_batch:
|
||||||
|
yield slim_doc_batch
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os
|
import os
|
||||||
|
@ -0,0 +1,96 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from danswer.configs.constants import DocumentSource
|
||||||
|
from danswer.connectors.models import Document
|
||||||
|
from danswer.connectors.zendesk.connector import ZendeskConnector
|
||||||
|
|
||||||
|
|
||||||
|
def load_test_data(file_name: str = "test_zendesk_data.json") -> dict[str, dict]:
|
||||||
|
current_dir = Path(__file__).parent
|
||||||
|
with open(current_dir / file_name, "r") as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def zendesk_article_connector() -> ZendeskConnector:
|
||||||
|
connector = ZendeskConnector(content_type="articles")
|
||||||
|
connector.load_credentials(get_credentials())
|
||||||
|
return connector
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def zendesk_ticket_connector() -> ZendeskConnector:
|
||||||
|
connector = ZendeskConnector(content_type="tickets")
|
||||||
|
connector.load_credentials(get_credentials())
|
||||||
|
return connector
|
||||||
|
|
||||||
|
|
||||||
|
def get_credentials() -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
|
||||||
|
"zendesk_email": os.environ["ZENDESK_EMAIL"],
|
||||||
|
"zendesk_token": os.environ["ZENDESK_TOKEN"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"connector_fixture", ["zendesk_article_connector", "zendesk_ticket_connector"]
|
||||||
|
)
|
||||||
|
def test_zendesk_connector_basic(
|
||||||
|
request: pytest.FixtureRequest, connector_fixture: str
|
||||||
|
) -> None:
|
||||||
|
connector = request.getfixturevalue(connector_fixture)
|
||||||
|
test_data = load_test_data()
|
||||||
|
all_docs: list[Document] = []
|
||||||
|
target_test_doc_id: str
|
||||||
|
if connector.content_type == "articles":
|
||||||
|
target_test_doc_id = f"article:{test_data['article']['id']}"
|
||||||
|
else:
|
||||||
|
target_test_doc_id = f"zendesk_ticket_{test_data['ticket']['id']}"
|
||||||
|
|
||||||
|
target_doc: Document | None = None
|
||||||
|
|
||||||
|
for doc_batch in connector.poll_source(0, time.time()):
|
||||||
|
for doc in doc_batch:
|
||||||
|
all_docs.append(doc)
|
||||||
|
if doc.id == target_test_doc_id:
|
||||||
|
target_doc = doc
|
||||||
|
|
||||||
|
assert len(all_docs) > 0, "No documents were retrieved from the connector"
|
||||||
|
assert (
|
||||||
|
target_doc is not None
|
||||||
|
), "Target document was not found in the retrieved documents"
|
||||||
|
assert target_doc.source == DocumentSource.ZENDESK, "Document source is not ZENDESK"
|
||||||
|
|
||||||
|
if connector.content_type == "articles":
|
||||||
|
print(f"target_doc.semantic_identifier {target_doc.semantic_identifier}")
|
||||||
|
assert (
|
||||||
|
target_doc.semantic_identifier
|
||||||
|
== test_data["article"]["semantic_identifier"]
|
||||||
|
), "Article title does not match"
|
||||||
|
else:
|
||||||
|
assert target_doc.semantic_identifier.startswith(
|
||||||
|
f"Ticket #{test_data['ticket']['id']}"
|
||||||
|
), "Ticket ID does not match"
|
||||||
|
|
||||||
|
|
||||||
|
def test_zendesk_connector_slim(zendesk_article_connector: ZendeskConnector) -> None:
|
||||||
|
# Get full doc IDs
|
||||||
|
all_full_doc_ids = set()
|
||||||
|
for doc_batch in zendesk_article_connector.load_from_state():
|
||||||
|
all_full_doc_ids.update([doc.id for doc in doc_batch])
|
||||||
|
|
||||||
|
# Get slim doc IDs
|
||||||
|
all_slim_doc_ids = set()
|
||||||
|
for slim_doc_batch in zendesk_article_connector.retrieve_all_slim_documents():
|
||||||
|
all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])
|
||||||
|
|
||||||
|
# Full docs should be subset of slim docs
|
||||||
|
assert all_full_doc_ids.issubset(
|
||||||
|
all_slim_doc_ids
|
||||||
|
), f"Full doc IDs {all_full_doc_ids} not subset of slim doc IDs {all_slim_doc_ids}"
|
@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"article": {
|
||||||
|
"id": "17275801227804",
|
||||||
|
"semantic_identifier": "How can agents leverage knowledge to help customers?"
|
||||||
|
|
||||||
|
},
|
||||||
|
"ticket": {
|
||||||
|
"id": "1"
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user