Guru and Productboard Time Updated (#683)

This commit is contained in:
Yuhong Sun
2023-11-02 14:27:06 -07:00
committed by GitHub
parent 80eedebe86
commit b0f76b97ef
5 changed files with 68 additions and 8 deletions

View File

@ -0,0 +1,16 @@
from datetime import datetime
from datetime import timezone
from dateutil.parser import parse
def datetime_to_utc(dt: datetime) -> datetime:
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.astimezone(timezone.utc)
def time_str_to_utc(datetime_str: str) -> datetime:
dt = parse(datetime_str)
return datetime_to_utc(dt)

View File

@ -190,8 +190,8 @@ if __name__ == "__main__":
)
current = time.time()
one_day_ago = current - 24 * 60 * 60 * 360 # 1 year
latest_docs = document360_connector.poll_source(one_day_ago, current)
one_year_ago = current - 24 * 60 * 60 * 360
latest_docs = document360_connector.poll_source(one_year_ago, current)
for doc in latest_docs:
print(doc)

View File

@ -292,7 +292,6 @@ class GongConnector(LoadConnector, PollConnector):
if __name__ == "__main__":
import os
import time
connector = GongConnector()
connector.load_credentials(
@ -302,6 +301,5 @@ if __name__ == "__main__":
}
)
current = time.time()
latest_docs = connector.load_from_state()
print(next(latest_docs))

View File

@ -8,6 +8,7 @@ import requests
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic
from danswer.connectors.cross_connector_utils.time_utils import time_str_to_utc
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
@ -77,13 +78,25 @@ class GuruConnector(LoadConnector, PollConnector):
title = card["preferredPhrase"]
link = GURU_CARDS_URL + card["slug"]
content_text = title + "\n" + parse_html_page_basic(card["content"])
last_updated = time_str_to_utc(card["lastModified"])
last_verified = (
time_str_to_utc(card.get("lastVerified"))
if card.get("lastVerified")
else None
)
# For Danswer, we decay document score overtime, either last_updated or
# last_verified is a good enough signal for the document's recency
latest_time = (
max(last_verified, last_updated) if last_verified else last_updated
)
doc_batch.append(
Document(
id=card["id"],
sections=[Section(link=link, text=content_text)],
source=DocumentSource.GURU,
semantic_identifier=title,
doc_updated_at=latest_time,
metadata={},
)
)
@ -109,3 +122,18 @@ class GuruConnector(LoadConnector, PollConnector):
end_time = unixtime_to_guru_time_str(end)
return self._process_cards(start_time, end_time)
if __name__ == "__main__":
import os
connector = GuruConnector()
connector.load_credentials(
{
"guru_user": os.environ["GURU_USER"],
"guru_user_token": os.environ["GURU_USER_TOKEN"],
}
)
latest_docs = connector.load_from_state()
print(next(latest_docs))

View File

@ -10,6 +10,7 @@ from retry import retry
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.time_utils import time_str_to_utc
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
@ -108,11 +109,11 @@ class ProductboardConnector(PollConnector):
],
semantic_identifier=feature["name"],
source=DocumentSource.PRODUCTBOARD,
doc_updated_at=time_str_to_utc(feature["updatedAt"]),
metadata={
"productboard_entity_type": feature["type"],
"status": feature["status"]["name"],
"owner": self._get_owner_email(feature),
"updated_at": feature["updatedAt"],
},
)
@ -136,10 +137,10 @@ class ProductboardConnector(PollConnector):
],
semantic_identifier=component["name"],
source=DocumentSource.PRODUCTBOARD,
doc_updated_at=time_str_to_utc(component["updatedAt"]),
metadata={
"productboard_entity_type": "component",
"owner": self._get_owner_email(component),
"updated_at": component["updatedAt"],
},
)
@ -164,10 +165,10 @@ class ProductboardConnector(PollConnector):
],
semantic_identifier=product["name"],
source=DocumentSource.PRODUCTBOARD,
doc_updated_at=time_str_to_utc(product["updatedAt"]),
metadata={
"productboard_entity_type": "product",
"owner": self._get_owner_email(product),
"updated_at": product["updatedAt"],
},
)
@ -190,11 +191,11 @@ class ProductboardConnector(PollConnector):
],
semantic_identifier=objective["name"],
source=DocumentSource.PRODUCTBOARD,
doc_updated_at=time_str_to_utc(objective["updatedAt"]),
metadata={
"productboard_entity_type": "release",
"state": objective["state"],
"owner": self._get_owner_email(objective),
"updated_at": objective["updatedAt"],
},
)
@ -252,3 +253,20 @@ class ProductboardConnector(PollConnector):
if document_batch:
yield document_batch
if __name__ == "__main__":
import os
import time
connector = ProductboardConnector()
connector.load_credentials(
{
"productboard_access_token": os.environ["PRODUCTBOARD_ACCESS_TOKEN"],
}
)
current = time.time()
one_year_ago = current - 24 * 60 * 60 * 360
latest_docs = connector.poll_source(one_year_ago, current)
print(next(latest_docs))