mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
Zendesk Connector Metadata and small batch fix (#866)
This commit is contained in:
parent
dca4f7a72b
commit
a122510cee
@ -1,3 +1,5 @@
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from zenpy import Zenpy # type: ignore
|
||||
@ -5,14 +7,34 @@ from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import BasicExpertInfo
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
|
||||
|
||||
def _article_to_document(article: Article) -> Document:
|
||||
author = BasicExpertInfo(
|
||||
display_name=article.author.name, email=article.author.email
|
||||
)
|
||||
update_time = datetime.fromisoformat(article.updated_at).astimezone(timezone.utc)
|
||||
return Document(
|
||||
id=f"article:{article.id}",
|
||||
sections=[
|
||||
Section(link=article.html_url, text=parse_html_page_basic(article.body))
|
||||
],
|
||||
source=DocumentSource.ZENDESK,
|
||||
semantic_identifier=article.title,
|
||||
doc_updated_at=update_time,
|
||||
primary_owners=[author],
|
||||
metadata={"type": "article"},
|
||||
)
|
||||
|
||||
|
||||
class ZendeskClientNotSetUpError(PermissionError):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("Zendesk Client is not set up, was load_credentials called?")
|
||||
@ -34,18 +56,6 @@ class ZendeskConnector(LoadConnector, PollConnector):
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self.poll_source(None, None)
|
||||
|
||||
def _article_to_document(self, article: Article) -> Document:
|
||||
return Document(
|
||||
id=f"article:{article.id}",
|
||||
sections=[Section(link=article.html_url, text=article.body)],
|
||||
source=DocumentSource.ZENDESK,
|
||||
semantic_identifier="Article: " + article.title,
|
||||
metadata={
|
||||
"type": "article",
|
||||
"updated_at": article.updated_at,
|
||||
},
|
||||
)
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
|
||||
) -> GenerateDocumentsOutput:
|
||||
@ -64,7 +74,10 @@ class ZendeskConnector(LoadConnector, PollConnector):
|
||||
if article.body is None:
|
||||
continue
|
||||
|
||||
doc_batch.append(self._article_to_document(article))
|
||||
doc_batch.append(_article_to_document(article))
|
||||
if len(doc_batch) >= self.batch_size:
|
||||
yield doc_batch
|
||||
doc_batch.clear()
|
||||
|
||||
if doc_batch:
|
||||
yield doc_batch
|
||||
|
Loading…
x
Reference in New Issue
Block a user