mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-24 23:11:01 +02:00
Merge pull request #3032 from danswer-ai/freshdesk-cleanup
Cleaned up connector
This commit is contained in:
commit
5d9b8364ab
@ -20,76 +20,90 @@ from danswer.utils.logger import setup_logger
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
_FRESHDESK_ID_PREFIX = "FRESHDESK_"
|
||||||
|
|
||||||
|
|
||||||
|
_TICKET_FIELDS_TO_INCLUDE = {
|
||||||
|
"fr_escalated",
|
||||||
|
"spam",
|
||||||
|
"priority",
|
||||||
|
"source",
|
||||||
|
"status",
|
||||||
|
"type",
|
||||||
|
"is_escalated",
|
||||||
|
"tags",
|
||||||
|
"nr_due_by",
|
||||||
|
"nr_escalated",
|
||||||
|
"cc_emails",
|
||||||
|
"fwd_emails",
|
||||||
|
"reply_cc_emails",
|
||||||
|
"ticket_cc_emails",
|
||||||
|
"support_email",
|
||||||
|
"to_emails",
|
||||||
|
}
|
||||||
|
|
||||||
|
_SOURCE_NUMBER_TYPE_MAP = {
|
||||||
|
"1": "Email",
|
||||||
|
"2": "Portal",
|
||||||
|
"3": "Phone",
|
||||||
|
"7": "Chat",
|
||||||
|
"9": "Feedback Widget",
|
||||||
|
"10": "Outbound Email",
|
||||||
|
}
|
||||||
|
|
||||||
|
_PRIORITY_NUMBER_TYPE_MAP = {"1": "low", "2": "medium", "3": "high", "4": "urgent"}
|
||||||
|
|
||||||
|
_STATUS_NUMBER_TYPE_MAP = {"2": "open", "3": "pending", "4": "resolved", "5": "closed"}
|
||||||
|
|
||||||
|
|
||||||
def _create_metadata_from_ticket(ticket: dict) -> dict:
|
def _create_metadata_from_ticket(ticket: dict) -> dict:
|
||||||
included_fields = {
|
metadata: dict[str, str | list[str]] = {}
|
||||||
"fr_escalated",
|
# Combine all emails into a list so there are no repeated emails
|
||||||
"spam",
|
email_data: set[str] = set()
|
||||||
"priority",
|
|
||||||
"source",
|
|
||||||
"status",
|
|
||||||
"type",
|
|
||||||
"is_escalated",
|
|
||||||
"tags",
|
|
||||||
"nr_due_by",
|
|
||||||
"nr_escalated",
|
|
||||||
"cc_emails",
|
|
||||||
"fwd_emails",
|
|
||||||
"reply_cc_emails",
|
|
||||||
"ticket_cc_emails",
|
|
||||||
"support_email",
|
|
||||||
"to_emails",
|
|
||||||
}
|
|
||||||
|
|
||||||
metadata = {}
|
|
||||||
email_data = {}
|
|
||||||
|
|
||||||
for key, value in ticket.items():
|
for key, value in ticket.items():
|
||||||
if (
|
# Skip fields that aren't useful for embedding
|
||||||
key in included_fields
|
if key not in _TICKET_FIELDS_TO_INCLUDE:
|
||||||
and value is not None
|
continue
|
||||||
and value != []
|
|
||||||
and value != {}
|
# Skip empty fields
|
||||||
and value != "[]"
|
if not value or value == "[]":
|
||||||
and value != ""
|
continue
|
||||||
):
|
|
||||||
value_to_str = (
|
# Convert strings or lists to strings
|
||||||
[str(item) for item in value] if isinstance(value, List) else str(value)
|
stringified_value: str | list[str]
|
||||||
)
|
if isinstance(value, list):
|
||||||
if "email" in key:
|
stringified_value = [str(item) for item in value]
|
||||||
email_data[key] = value_to_str
|
else:
|
||||||
|
stringified_value = str(value)
|
||||||
|
|
||||||
|
if "email" in key:
|
||||||
|
if isinstance(stringified_value, list):
|
||||||
|
email_data.update(stringified_value)
|
||||||
else:
|
else:
|
||||||
metadata[key] = value_to_str
|
email_data.add(stringified_value)
|
||||||
|
else:
|
||||||
|
metadata[key] = stringified_value
|
||||||
|
|
||||||
if email_data:
|
if email_data:
|
||||||
metadata["email_data"] = str(email_data)
|
metadata["emails"] = list(email_data)
|
||||||
|
|
||||||
# Convert source to human-parsable string
|
# Convert source numbers to human-parsable string
|
||||||
source_types = {
|
if source_number := ticket.get("source"):
|
||||||
"1": "Email",
|
metadata["source"] = _SOURCE_NUMBER_TYPE_MAP.get(
|
||||||
"2": "Portal",
|
str(source_number), "Unknown Source Type"
|
||||||
"3": "Phone",
|
|
||||||
"7": "Chat",
|
|
||||||
"9": "Feedback Widget",
|
|
||||||
"10": "Outbound Email",
|
|
||||||
}
|
|
||||||
if ticket.get("source"):
|
|
||||||
metadata["source"] = source_types.get(
|
|
||||||
str(ticket.get("source")), "Unknown Source Type"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert priority to human-parsable string
|
# Convert priority numbers to human-parsable string
|
||||||
priority_types = {"1": "low", "2": "medium", "3": "high", "4": "urgent"}
|
if priority_number := ticket.get("priority"):
|
||||||
if ticket.get("priority"):
|
metadata["priority"] = _PRIORITY_NUMBER_TYPE_MAP.get(
|
||||||
metadata["priority"] = priority_types.get(
|
priority_number, "Unknown Priority"
|
||||||
str(ticket.get("priority")), "Unknown Priority"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert status to human-parsable string
|
# Convert status to human-parsable string
|
||||||
status_types = {"2": "open", "3": "pending", "4": "resolved", "5": "closed"}
|
if status_number := ticket.get("status"):
|
||||||
if ticket.get("status"):
|
metadata["status"] = _STATUS_NUMBER_TYPE_MAP.get(
|
||||||
metadata["status"] = status_types.get(
|
str(status_number), "Unknown Status"
|
||||||
str(ticket.get("status")), "Unknown Status"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
due_by = datetime.fromisoformat(ticket["due_by"].replace("Z", "+00:00"))
|
due_by = datetime.fromisoformat(ticket["due_by"].replace("Z", "+00:00"))
|
||||||
@ -99,17 +113,24 @@ def _create_metadata_from_ticket(ticket: dict) -> dict:
|
|||||||
|
|
||||||
|
|
||||||
def _create_doc_from_ticket(ticket: dict, domain: str) -> Document:
|
def _create_doc_from_ticket(ticket: dict, domain: str) -> Document:
|
||||||
|
# Use the ticket description as the text
|
||||||
|
text = f"Ticket description: {parse_html_page_basic(ticket.get('description_text', ''))}"
|
||||||
|
metadata = _create_metadata_from_ticket(ticket)
|
||||||
|
|
||||||
|
# This is also used in the ID because it is more unique than the just the ticket ID
|
||||||
|
link = f"https://{domain}.freshdesk.com/helpdesk/tickets/{ticket['id']}"
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=str(ticket["id"]),
|
id=_FRESHDESK_ID_PREFIX + link,
|
||||||
sections=[
|
sections=[
|
||||||
Section(
|
Section(
|
||||||
link=f"https://{domain}.freshdesk.com/helpdesk/tickets/{int(ticket['id'])}",
|
link=link,
|
||||||
text=f"description: {parse_html_page_basic(ticket.get('description_text', ''))}",
|
text=text,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
source=DocumentSource.FRESHDESK,
|
source=DocumentSource.FRESHDESK,
|
||||||
semantic_identifier=ticket["subject"],
|
semantic_identifier=ticket["subject"],
|
||||||
metadata=_create_metadata_from_ticket(ticket),
|
metadata=metadata,
|
||||||
doc_updated_at=datetime.fromisoformat(
|
doc_updated_at=datetime.fromisoformat(
|
||||||
ticket["updated_at"].replace("Z", "+00:00")
|
ticket["updated_at"].replace("Z", "+00:00")
|
||||||
),
|
),
|
||||||
@ -146,7 +167,7 @@ class FreshdeskConnector(PollConnector, LoadConnector):
|
|||||||
'include' field available for this endpoint:
|
'include' field available for this endpoint:
|
||||||
https://developers.freshdesk.com/api/#filter_tickets
|
https://developers.freshdesk.com/api/#filter_tickets
|
||||||
"""
|
"""
|
||||||
if any(attr is None for attr in [self.api_key, self.domain, self.password]):
|
if self.api_key is None or self.domain is None or self.password is None:
|
||||||
raise ConnectorMissingCredentialError("freshdesk")
|
raise ConnectorMissingCredentialError("freshdesk")
|
||||||
|
|
||||||
base_url = f"https://{self.domain}.freshdesk.com/api/v2/tickets"
|
base_url = f"https://{self.domain}.freshdesk.com/api/v2/tickets"
|
||||||
@ -187,7 +208,6 @@ class FreshdeskConnector(PollConnector, LoadConnector):
|
|||||||
|
|
||||||
for ticket_batch in self._fetch_tickets(start, end):
|
for ticket_batch in self._fetch_tickets(start, end):
|
||||||
for ticket in ticket_batch:
|
for ticket in ticket_batch:
|
||||||
logger.info(_create_doc_from_ticket(ticket, self.domain))
|
|
||||||
doc_batch.append(_create_doc_from_ticket(ticket, self.domain))
|
doc_batch.append(_create_doc_from_ticket(ticket, self.domain))
|
||||||
|
|
||||||
if len(doc_batch) >= self.batch_size:
|
if len(doc_batch) >= self.batch_size:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user