mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 05:05:34 +02:00
Clean up code
This commit is contained in:
@@ -11,41 +11,41 @@ from danswer.utils.logger import setup_logger
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
def create_doc_from_ticket(ticket: dict, domain: str) -> Document:
|
||||||
class FreshdeskConnector(PollConnector, LoadConnector):
|
return Document(
|
||||||
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
id=ticket["id"],
|
||||||
self.batch_size = batch_size
|
sections=Section(
|
||||||
|
link=f"https://{domain}.freshdesk.com/helpdesk/tickets/{int(ticket['id'])}",
|
||||||
def ticket_link(self, tid: int) -> str:
|
|
||||||
return f"https://{self.domain}.freshdesk.com/helpdesk/tickets/{tid}"
|
|
||||||
|
|
||||||
def build_doc_sections_from_ticket(self, ticket: dict) -> List[Section]:
|
|
||||||
# Use list comprehension for building sections
|
|
||||||
return [
|
|
||||||
Section(
|
|
||||||
link=self.ticket_link(int(ticket["id"])),
|
|
||||||
text=json.dumps({
|
text=json.dumps({
|
||||||
key: value
|
key: value
|
||||||
for key, value in ticket.items()
|
for key, value in ticket.items()
|
||||||
if isinstance(value, str)
|
if isinstance(value, str)
|
||||||
}, default=str),
|
}, default=str),
|
||||||
|
),
|
||||||
|
source=DocumentSource.FRESHDESK,
|
||||||
|
semantic_identifier=ticket["subject"],
|
||||||
|
metadata={
|
||||||
|
key: value.isoformat() if isinstance(value, datetime) else str(value)
|
||||||
|
for key, value in ticket.items()
|
||||||
|
if isinstance(value, (str, datetime)) and key not in ["description", "description_text"]
|
||||||
|
},
|
||||||
)
|
)
|
||||||
]
|
|
||||||
|
|
||||||
def strip_html_tags(self, html: str) -> str:
|
class FreshdeskConnector(PollConnector, LoadConnector):
|
||||||
return parse_html_page_basic(html)
|
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||||
|
self.batch_size = batch_size
|
||||||
|
|
||||||
def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:
|
def _load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:
|
||||||
self.api_key = credentials.get("freshdesk_api_key")
|
self.api_key = credentials.get("freshdesk_api_key")
|
||||||
self.domain = credentials.get("freshdesk_domain")
|
self.domain = credentials.get("freshdesk_domain")
|
||||||
self.password = credentials.get("freshdesk_password")
|
self.password = credentials.get("freshdesk_password")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _fetch_tickets(self, start: datetime, end: datetime) -> List[dict]:
|
def _fetch_tickets(self, start: datetime | None = None, end: datetime | None = None) -> List[dict]:
|
||||||
if any([self.api_key, self.domain, self.password]) is None:
|
if any([self.api_key, self.domain, self.password]) is None:
|
||||||
raise ConnectorMissingCredentialError("freshdesk")
|
raise ConnectorMissingCredentialError("freshdesk")
|
||||||
|
|
||||||
start_time = start.strftime("%Y-%m-%dT%H:%M:%SZ")
|
start_time = start.isoformat() if start else None
|
||||||
|
|
||||||
all_tickets = []
|
all_tickets = []
|
||||||
page = 1
|
page = 1
|
||||||
@@ -54,7 +54,7 @@ class FreshdeskConnector(PollConnector, LoadConnector):
|
|||||||
while True:
|
while True:
|
||||||
freshdesk_url = (
|
freshdesk_url = (
|
||||||
f"https://{self.domain}.freshdesk.com/api/v2/tickets"
|
f"https://{self.domain}.freshdesk.com/api/v2/tickets"
|
||||||
f"?include=description&updated_since={start_time}"
|
f"?include=description{f"&updated_since={start_time}" if start_time else ""}"
|
||||||
f"&per_page={per_page}&page={page}"
|
f"&per_page={per_page}&page={page}"
|
||||||
)
|
)
|
||||||
response = requests.get(freshdesk_url, auth=(self.api_key, self.password))
|
response = requests.get(freshdesk_url, auth=(self.api_key, self.password))
|
||||||
@@ -74,20 +74,14 @@ class FreshdeskConnector(PollConnector, LoadConnector):
|
|||||||
|
|
||||||
return all_tickets
|
return all_tickets
|
||||||
|
|
||||||
def _process_tickets(self, start: datetime, end: datetime) -> GenerateDocumentsOutput:
|
def _process_tickets(self, start: datetime | None = None, end: datetime | None = None) -> GenerateDocumentsOutput:
|
||||||
# Ensure start and end are in UTC
|
|
||||||
start = start.astimezone(timezone.utc)
|
|
||||||
end = end.astimezone(timezone.utc)
|
|
||||||
|
|
||||||
tickets = self._fetch_tickets(start, end)
|
tickets = self._fetch_tickets(start, end)
|
||||||
|
|
||||||
doc_batch: List[Document] = []
|
doc_batch: List[Document] = []
|
||||||
|
|
||||||
for ticket in tickets:
|
for ticket in tickets:
|
||||||
# Convert date fields to UTC
|
|
||||||
for date_field in ["created_at", "updated_at", "due_by"]:
|
for date_field in ["created_at", "updated_at", "due_by"]:
|
||||||
if ticket[date_field].endswith('Z'):
|
ticket[date_field] = datetime.fromisoformat(ticket[date_field].rstrip('Z'))
|
||||||
ticket[date_field] = ticket[date_field][:-1] + '+00:00'
|
|
||||||
ticket[date_field] = datetime.fromisoformat(ticket[date_field]).replace(tzinfo=timezone.utc)
|
|
||||||
|
|
||||||
# Convert all other values to strings
|
# Convert all other values to strings
|
||||||
ticket = {
|
ticket = {
|
||||||
@@ -104,28 +98,12 @@ class FreshdeskConnector(PollConnector, LoadConnector):
|
|||||||
ticket["status"] = status_mapping.get(ticket["status"], str(ticket["status"]))
|
ticket["status"] = status_mapping.get(ticket["status"], str(ticket["status"]))
|
||||||
|
|
||||||
# Stripping HTML tags from the description field
|
# Stripping HTML tags from the description field
|
||||||
ticket["description"] = self.strip_html_tags(ticket["description"])
|
ticket["description"] = parse_html_page_basic(ticket["description"])
|
||||||
|
|
||||||
# Remove extra white spaces from the description field
|
# Remove extra white spaces from the description field
|
||||||
ticket["description"] = " ".join(ticket["description"].split())
|
ticket["description"] = " ".join(ticket["description"].split())
|
||||||
|
|
||||||
# Use list comprehension for building sections
|
doc_batch.append(create_doc_from_ticket(ticket, self.domain))
|
||||||
sections = self.build_doc_sections_from_ticket(ticket)
|
|
||||||
|
|
||||||
created_at = ticket["created_at"]
|
|
||||||
if start <= created_at <= end:
|
|
||||||
doc = Document(
|
|
||||||
id=ticket["id"],
|
|
||||||
sections=sections,
|
|
||||||
source=DocumentSource.FRESHDESK,
|
|
||||||
semantic_identifier=ticket["subject"],
|
|
||||||
metadata={
|
|
||||||
key: value.isoformat() if isinstance(value, datetime) else str(value)
|
|
||||||
for key, value in ticket.items()
|
|
||||||
if isinstance(value, (str, datetime)) and key not in ["description", "description_text"]
|
|
||||||
},
|
|
||||||
)
|
|
||||||
doc_batch.append(doc)
|
|
||||||
|
|
||||||
if len(doc_batch) >= self.batch_size:
|
if len(doc_batch) >= self.batch_size:
|
||||||
yield doc_batch
|
yield doc_batch
|
||||||
@@ -135,7 +113,7 @@ class FreshdeskConnector(PollConnector, LoadConnector):
|
|||||||
yield doc_batch
|
yield doc_batch
|
||||||
|
|
||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
return self._fetch_tickets()
|
return self._process_tickets()
|
||||||
|
|
||||||
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
|
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
|
||||||
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
|
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
|
||||||
|
Reference in New Issue
Block a user