mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-07 13:10:24 +02:00
Guru and Productboard Time Updated (#683)
This commit is contained in:
@ -0,0 +1,16 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from datetime import timezone
|
||||||
|
|
||||||
|
from dateutil.parser import parse
|
||||||
|
|
||||||
|
|
||||||
|
def datetime_to_utc(dt: datetime) -> datetime:
|
||||||
|
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
return dt.astimezone(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def time_str_to_utc(datetime_str: str) -> datetime:
|
||||||
|
dt = parse(datetime_str)
|
||||||
|
return datetime_to_utc(dt)
|
@ -190,8 +190,8 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
current = time.time()
|
current = time.time()
|
||||||
one_day_ago = current - 24 * 60 * 60 * 360 # 1 year
|
one_year_ago = current - 24 * 60 * 60 * 360
|
||||||
latest_docs = document360_connector.poll_source(one_day_ago, current)
|
latest_docs = document360_connector.poll_source(one_year_ago, current)
|
||||||
|
|
||||||
for doc in latest_docs:
|
for doc in latest_docs:
|
||||||
print(doc)
|
print(doc)
|
||||||
|
@ -292,7 +292,6 @@ class GongConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os
|
import os
|
||||||
import time
|
|
||||||
|
|
||||||
connector = GongConnector()
|
connector = GongConnector()
|
||||||
connector.load_credentials(
|
connector.load_credentials(
|
||||||
@ -302,6 +301,5 @@ if __name__ == "__main__":
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
current = time.time()
|
|
||||||
latest_docs = connector.load_from_state()
|
latest_docs = connector.load_from_state()
|
||||||
print(next(latest_docs))
|
print(next(latest_docs))
|
||||||
|
@ -8,6 +8,7 @@ import requests
|
|||||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic
|
from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic
|
||||||
|
from danswer.connectors.cross_connector_utils.time_utils import time_str_to_utc
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
@ -77,13 +78,25 @@ class GuruConnector(LoadConnector, PollConnector):
|
|||||||
title = card["preferredPhrase"]
|
title = card["preferredPhrase"]
|
||||||
link = GURU_CARDS_URL + card["slug"]
|
link = GURU_CARDS_URL + card["slug"]
|
||||||
content_text = title + "\n" + parse_html_page_basic(card["content"])
|
content_text = title + "\n" + parse_html_page_basic(card["content"])
|
||||||
|
last_updated = time_str_to_utc(card["lastModified"])
|
||||||
|
last_verified = (
|
||||||
|
time_str_to_utc(card.get("lastVerified"))
|
||||||
|
if card.get("lastVerified")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# For Danswer, we decay document score overtime, either last_updated or
|
||||||
|
# last_verified is a good enough signal for the document's recency
|
||||||
|
latest_time = (
|
||||||
|
max(last_verified, last_updated) if last_verified else last_updated
|
||||||
|
)
|
||||||
doc_batch.append(
|
doc_batch.append(
|
||||||
Document(
|
Document(
|
||||||
id=card["id"],
|
id=card["id"],
|
||||||
sections=[Section(link=link, text=content_text)],
|
sections=[Section(link=link, text=content_text)],
|
||||||
source=DocumentSource.GURU,
|
source=DocumentSource.GURU,
|
||||||
semantic_identifier=title,
|
semantic_identifier=title,
|
||||||
|
doc_updated_at=latest_time,
|
||||||
metadata={},
|
metadata={},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -109,3 +122,18 @@ class GuruConnector(LoadConnector, PollConnector):
|
|||||||
end_time = unixtime_to_guru_time_str(end)
|
end_time = unixtime_to_guru_time_str(end)
|
||||||
|
|
||||||
return self._process_cards(start_time, end_time)
|
return self._process_cards(start_time, end_time)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import os
|
||||||
|
|
||||||
|
connector = GuruConnector()
|
||||||
|
connector.load_credentials(
|
||||||
|
{
|
||||||
|
"guru_user": os.environ["GURU_USER"],
|
||||||
|
"guru_user_token": os.environ["GURU_USER_TOKEN"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
latest_docs = connector.load_from_state()
|
||||||
|
print(next(latest_docs))
|
||||||
|
@ -10,6 +10,7 @@ from retry import retry
|
|||||||
|
|
||||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
|
from danswer.connectors.cross_connector_utils.time_utils import time_str_to_utc
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
@ -108,11 +109,11 @@ class ProductboardConnector(PollConnector):
|
|||||||
],
|
],
|
||||||
semantic_identifier=feature["name"],
|
semantic_identifier=feature["name"],
|
||||||
source=DocumentSource.PRODUCTBOARD,
|
source=DocumentSource.PRODUCTBOARD,
|
||||||
|
doc_updated_at=time_str_to_utc(feature["updatedAt"]),
|
||||||
metadata={
|
metadata={
|
||||||
"productboard_entity_type": feature["type"],
|
"productboard_entity_type": feature["type"],
|
||||||
"status": feature["status"]["name"],
|
"status": feature["status"]["name"],
|
||||||
"owner": self._get_owner_email(feature),
|
"owner": self._get_owner_email(feature),
|
||||||
"updated_at": feature["updatedAt"],
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -136,10 +137,10 @@ class ProductboardConnector(PollConnector):
|
|||||||
],
|
],
|
||||||
semantic_identifier=component["name"],
|
semantic_identifier=component["name"],
|
||||||
source=DocumentSource.PRODUCTBOARD,
|
source=DocumentSource.PRODUCTBOARD,
|
||||||
|
doc_updated_at=time_str_to_utc(component["updatedAt"]),
|
||||||
metadata={
|
metadata={
|
||||||
"productboard_entity_type": "component",
|
"productboard_entity_type": "component",
|
||||||
"owner": self._get_owner_email(component),
|
"owner": self._get_owner_email(component),
|
||||||
"updated_at": component["updatedAt"],
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -164,10 +165,10 @@ class ProductboardConnector(PollConnector):
|
|||||||
],
|
],
|
||||||
semantic_identifier=product["name"],
|
semantic_identifier=product["name"],
|
||||||
source=DocumentSource.PRODUCTBOARD,
|
source=DocumentSource.PRODUCTBOARD,
|
||||||
|
doc_updated_at=time_str_to_utc(product["updatedAt"]),
|
||||||
metadata={
|
metadata={
|
||||||
"productboard_entity_type": "product",
|
"productboard_entity_type": "product",
|
||||||
"owner": self._get_owner_email(product),
|
"owner": self._get_owner_email(product),
|
||||||
"updated_at": product["updatedAt"],
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -190,11 +191,11 @@ class ProductboardConnector(PollConnector):
|
|||||||
],
|
],
|
||||||
semantic_identifier=objective["name"],
|
semantic_identifier=objective["name"],
|
||||||
source=DocumentSource.PRODUCTBOARD,
|
source=DocumentSource.PRODUCTBOARD,
|
||||||
|
doc_updated_at=time_str_to_utc(objective["updatedAt"]),
|
||||||
metadata={
|
metadata={
|
||||||
"productboard_entity_type": "release",
|
"productboard_entity_type": "release",
|
||||||
"state": objective["state"],
|
"state": objective["state"],
|
||||||
"owner": self._get_owner_email(objective),
|
"owner": self._get_owner_email(objective),
|
||||||
"updated_at": objective["updatedAt"],
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -252,3 +253,20 @@ class ProductboardConnector(PollConnector):
|
|||||||
|
|
||||||
if document_batch:
|
if document_batch:
|
||||||
yield document_batch
|
yield document_batch
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
connector = ProductboardConnector()
|
||||||
|
connector.load_credentials(
|
||||||
|
{
|
||||||
|
"productboard_access_token": os.environ["PRODUCTBOARD_ACCESS_TOKEN"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
current = time.time()
|
||||||
|
one_year_ago = current - 24 * 60 * 60 * 360
|
||||||
|
latest_docs = connector.poll_source(one_year_ago, current)
|
||||||
|
print(next(latest_docs))
|
||||||
|
Reference in New Issue
Block a user