mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-02 19:19:30 +02:00
Zendesk Connector Metadata and small batch fix (#866)
This commit is contained in:
parent
dca4f7a72b
commit
a122510cee
@ -1,3 +1,5 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from datetime import timezone
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from zenpy import Zenpy # type: ignore
|
from zenpy import Zenpy # type: ignore
|
||||||
@ -5,14 +7,34 @@ from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore
|
|||||||
|
|
||||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
|
from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import BasicExpertInfo
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
|
|
||||||
|
|
||||||
|
def _article_to_document(article: Article) -> Document:
|
||||||
|
author = BasicExpertInfo(
|
||||||
|
display_name=article.author.name, email=article.author.email
|
||||||
|
)
|
||||||
|
update_time = datetime.fromisoformat(article.updated_at).astimezone(timezone.utc)
|
||||||
|
return Document(
|
||||||
|
id=f"article:{article.id}",
|
||||||
|
sections=[
|
||||||
|
Section(link=article.html_url, text=parse_html_page_basic(article.body))
|
||||||
|
],
|
||||||
|
source=DocumentSource.ZENDESK,
|
||||||
|
semantic_identifier=article.title,
|
||||||
|
doc_updated_at=update_time,
|
||||||
|
primary_owners=[author],
|
||||||
|
metadata={"type": "article"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ZendeskClientNotSetUpError(PermissionError):
|
class ZendeskClientNotSetUpError(PermissionError):
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__("Zendesk Client is not set up, was load_credentials called?")
|
super().__init__("Zendesk Client is not set up, was load_credentials called?")
|
||||||
@ -34,18 +56,6 @@ class ZendeskConnector(LoadConnector, PollConnector):
|
|||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
return self.poll_source(None, None)
|
return self.poll_source(None, None)
|
||||||
|
|
||||||
def _article_to_document(self, article: Article) -> Document:
|
|
||||||
return Document(
|
|
||||||
id=f"article:{article.id}",
|
|
||||||
sections=[Section(link=article.html_url, text=article.body)],
|
|
||||||
source=DocumentSource.ZENDESK,
|
|
||||||
semantic_identifier="Article: " + article.title,
|
|
||||||
metadata={
|
|
||||||
"type": "article",
|
|
||||||
"updated_at": article.updated_at,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
def poll_source(
|
def poll_source(
|
||||||
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
|
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
|
||||||
) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
@ -64,7 +74,10 @@ class ZendeskConnector(LoadConnector, PollConnector):
|
|||||||
if article.body is None:
|
if article.body is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
doc_batch.append(self._article_to_document(article))
|
doc_batch.append(_article_to_document(article))
|
||||||
if len(doc_batch) >= self.batch_size:
|
if len(doc_batch) >= self.batch_size:
|
||||||
yield doc_batch
|
yield doc_batch
|
||||||
doc_batch.clear()
|
doc_batch.clear()
|
||||||
|
|
||||||
|
if doc_batch:
|
||||||
|
yield doc_batch
|
||||||
|
Loading…
x
Reference in New Issue
Block a user