From 5edc464c9acc600478a1bba5875f3e356ba21420 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Thu, 18 Jan 2024 16:12:46 -0800 Subject: [PATCH] Fix GitLabs CI (#965) --- .../danswer/connectors/gitlab/connector.py | 110 +++++++++--------- web/src/app/admin/connectors/gitlab/page.tsx | 5 +- .../admin/connectors/ConnectorTitle.tsx | 2 +- web/src/components/icons/icons.tsx | 1 - web/src/lib/sources.ts | 8 +- web/src/lib/types.ts | 3 +- 6 files changed, 62 insertions(+), 67 deletions(-) diff --git a/backend/danswer/connectors/gitlab/connector.py b/backend/danswer/connectors/gitlab/connector.py index ef2181d0f..665e3cf55 100644 --- a/backend/danswer/connectors/gitlab/connector.py +++ b/backend/danswer/connectors/gitlab/connector.py @@ -1,18 +1,20 @@ - import itertools +from collections.abc import Iterable from collections.abc import Iterator from datetime import datetime from datetime import timezone from typing import Any import gitlab + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch -from danswer.connectors.models import BasicExpertInfo, ConnectorMissingCredentialError +from danswer.connectors.models import BasicExpertInfo +from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.utils.logger import setup_logger @@ -22,22 +24,21 @@ logger = setup_logger() def _batch_gitlab_objects( - git_objs: list[Any], - batch_size: int + git_objs: Iterable[Any], batch_size: int ) -> Iterator[list[Any]]: it = iter(git_objs) while True: - batch = list(itertools.islice(it, batch_size[0])) + batch = list(itertools.islice(it, batch_size)) if not batch: break yield batch -def get_author(author:Any)-> BasicExpertInfo: + +def get_author(author: Any) -> BasicExpertInfo: return BasicExpertInfo( display_name=author.get("name"), first_name=author.get("name").split(" ")[0], - last_name=author.get("name").split(" ")[1] - + last_name=author.get("name").split(" ")[1], ) @@ -52,10 +53,7 @@ def _convert_merge_request_to_document(mr: Any) -> Document: # due to local time discrepancies with UTC doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc), primary_owners=[get_author(mr.author)], - metadata={ - "state": mr.state, - "type": "MergeRequest" - }, + metadata={"state": mr.state, "type": "MergeRequest"}, ) @@ -70,41 +68,42 @@ def _convert_issue_to_document(issue: Any) -> Document: # due to local time discrepancies with UTC doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc), primary_owners=[get_author(issue.author)], - metadata={ - "state": issue.state, - "type": issue.type | "Issue" - }, + metadata={"state": issue.state, "type": issue.type | "Issue"}, ) + class GitlabConnector(LoadConnector, PollConnector): - def __init__(self, - project_owner: str, - project_name: str, - batch_size: int = INDEX_BATCH_SIZE, - state_filter: str = "all", - include_mrs: bool = True, - include_issues: bool = True, - ) -> None: - self.project_owner=project_owner, - self.project_name=project_name, - self.batch_size=batch_size, - self.state_filter=state_filter, - self.include_mrs=include_mrs, - self.include_issues=include_issues, - self.gitlab_client :gitlab.Gitlab | None = None - - + def __init__( + self, + project_owner: str, + project_name: str, + batch_size: int = INDEX_BATCH_SIZE, + state_filter: str = "all", + include_mrs: bool = True, + include_issues: bool = True, + ) -> None: + self.project_owner = project_owner + self.project_name = project_name + self.batch_size = batch_size + self.state_filter = state_filter + self.include_mrs = include_mrs + self.include_issues = include_issues + self.gitlab_client: gitlab.Gitlab | None = None def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - self.gitlab_client = gitlab.Gitlab(credentials["gitlab_url"], private_token=credentials['gitlab_access_token']) + self.gitlab_client = gitlab.Gitlab( + credentials["gitlab_url"], private_token=credentials["gitlab_access_token"] + ) return None - - - def _fetch_from_gitlab(self, start: datetime | None = None, end: datetime | None = None) -> GenerateDocumentsOutput: + def _fetch_from_gitlab( + self, start: datetime | None = None, end: datetime | None = None + ) -> GenerateDocumentsOutput: if self.gitlab_client is None: raise ConnectorMissingCredentialError("Gitlab") - project = self.gitlab_client.projects.get(f"{self.project_owner[0]}/{self.project_name[0]}") + project = self.gitlab_client.projects.get( + f"{self.project_owner[0]}/{self.project_name[0]}" + ) if self.include_mrs: merge_requests = project.mergerequests.list( @@ -112,9 +111,11 @@ class GitlabConnector(LoadConnector, PollConnector): ) for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size): - doc_batch =[] + doc_batch: list[Document] = [] for mr in mr_batch: - mr.updated_at = datetime.strptime(mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ") + mr.updated_at = datetime.strptime( + mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ" + ) if start is not None and mr.updated_at < start: yield doc_batch return @@ -124,21 +125,21 @@ class GitlabConnector(LoadConnector, PollConnector): yield doc_batch if self.include_issues: - issues = project.issues.list( - state=self.state_filter - ) + issues = project.issues.list(state=self.state_filter) for issue_batch in _batch_gitlab_objects(issues, self.batch_size): - doc_batch =[] + doc_batch = [] for issue in issue_batch: - issue.updated_at = datetime.strptime(issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ") + issue.updated_at = datetime.strptime( + issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ" + ) if start is not None and issue.updated_at < start: yield doc_batch return if end is not None and issue.updated_at > end: continue - if issue.updated_at is not None : - # MRs are handled separately + if issue.updated_at is not None: + # MRs are handled separately continue doc_batch.append(_convert_issue_to_document(issue)) yield doc_batch @@ -146,19 +147,17 @@ class GitlabConnector(LoadConnector, PollConnector): def load_from_state(self) -> GenerateDocumentsOutput: return self._fetch_from_gitlab() - def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput: + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: start_datetime = datetime.utcfromtimestamp(start) end_datetime = datetime.utcfromtimestamp(end) return self._fetch_from_gitlab(start_datetime, end_datetime) - - - - - if __name__ == "__main__": import os + connector = GitlabConnector( # gitlab_url="https://gitlab.com/api/v4", project_owner=os.environ["PROJECT_OWNER"], @@ -172,11 +171,8 @@ if __name__ == "__main__": connector.load_credentials( { "github_access_token": os.environ["GITLAB_ACCESS_TOKEN"], - "gitlab_url":os.environ["GITLAB_URL"] + "gitlab_url": os.environ["GITLAB_URL"], } ) document_batches = connector.load_from_state() print(next(document_batches)) - - - diff --git a/web/src/app/admin/connectors/gitlab/page.tsx b/web/src/app/admin/connectors/gitlab/page.tsx index 46dd9fc5a..37eddca30 100644 --- a/web/src/app/admin/connectors/gitlab/page.tsx +++ b/web/src/app/admin/connectors/gitlab/page.tsx @@ -108,7 +108,8 @@ const Main = () => { formBody={ <> - If you are using GitLab Cloud, keep the default value below + If you are using GitLab Cloud, keep the default value below + { })} initialValues={{ gitlab_access_token: "", - gitlab_url: "https://gitlab.com" + gitlab_url: "https://gitlab.com", }} onSubmit={(isSuccess) => { if (isSuccess) { diff --git a/web/src/components/admin/connectors/ConnectorTitle.tsx b/web/src/components/admin/connectors/ConnectorTitle.tsx index c6baf4666..e7b895266 100644 --- a/web/src/components/admin/connectors/ConnectorTitle.tsx +++ b/web/src/components/admin/connectors/ConnectorTitle.tsx @@ -45,7 +45,7 @@ export const ConnectorTitle = ({ "Repo", `${typedConnector.connector_specific_config.project_owner}/${typedConnector.connector_specific_config.project_name}` ); - } else if (connector.source === "confluence") { + } else if (connector.source === "confluence") { const typedConnector = connector as Connector; additionalMetadata.set( "Wiki URL", diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 9c49c1040..3d2fe20eb 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -356,7 +356,6 @@ export const GithubIcon = ({ ); }; - export const GoogleDriveIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index b82f79d58..2e654a2f5 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -61,10 +61,10 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Github", category: SourceCategory.AppConnection, }, - gitlab :{ - icon:GitlabIcon, - displayName:"Gitlab", - category:SourceCategory.AppConnection, + gitlab: { + icon: GitlabIcon, + displayName: "Gitlab", + category: SourceCategory.AppConnection, }, confluence: { icon: ConfluenceIcon, diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 0827ba512..69d5dfe4c 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -85,7 +85,6 @@ export interface GitlabConfig { include_issues: boolean; } - export interface GoogleDriveConfig { folder_paths?: string[]; include_shared?: boolean; @@ -199,7 +198,7 @@ export interface GithubCredentialJson { } export interface GitlabCredentialJson { - gitlab_url:string, + gitlab_url: string; gitlab_access_token: string; }