From 1b55e617ad04ab865bad41432e02b93bb8abee3a Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 11 Feb 2024 17:08:43 -0800 Subject: [PATCH] Offset Github by 3 hours to not lose updates (#1073) --- backend/danswer/connectors/github/connector.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/danswer/connectors/github/connector.py b/backend/danswer/connectors/github/connector.py index 6ffaa60d2d..89e5de551f 100644 --- a/backend/danswer/connectors/github/connector.py +++ b/backend/danswer/connectors/github/connector.py @@ -214,7 +214,17 @@ class GithubConnector(LoadConnector, PollConnector): ) -> GenerateDocumentsOutput: start_datetime = datetime.utcfromtimestamp(start) end_datetime = datetime.utcfromtimestamp(end) - return self._fetch_from_github(start_datetime, end_datetime) + + # Move start time back by 3 hours, since some Issues/PRs are getting dropped + # Could be due to delayed processing on GitHub side + # The non-updated issues since last poll will be shortcut-ed and not embedded + adjusted_start_datetime = start_datetime - timedelta(hours=3) + + epoch = datetime.utcfromtimestamp(0) + if adjusted_start_datetime < epoch: + adjusted_start_datetime = epoch + + return self._fetch_from_github(adjusted_start_datetime, end_datetime) if __name__ == "__main__":