Fix GitLabs CI (#965)

This commit is contained in:
Yuhong Sun 2024-01-18 16:12:46 -08:00 committed by GitHub
parent 1670d923aa
commit 5edc464c9a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 62 additions and 67 deletions

View File

@ -1,18 +1,20 @@
import itertools
from collections.abc import Iterable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
import gitlab
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import BasicExpertInfo, ConnectorMissingCredentialError
from danswer.connectors.models import BasicExpertInfo
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
@ -22,22 +24,21 @@ logger = setup_logger()
def _batch_gitlab_objects(
git_objs: list[Any],
batch_size: int
git_objs: Iterable[Any], batch_size: int
) -> Iterator[list[Any]]:
it = iter(git_objs)
while True:
batch = list(itertools.islice(it, batch_size[0]))
batch = list(itertools.islice(it, batch_size))
if not batch:
break
yield batch
def get_author(author:Any)-> BasicExpertInfo:
def get_author(author: Any) -> BasicExpertInfo:
return BasicExpertInfo(
display_name=author.get("name"),
first_name=author.get("name").split(" ")[0],
last_name=author.get("name").split(" ")[1]
last_name=author.get("name").split(" ")[1],
)
@ -52,10 +53,7 @@ def _convert_merge_request_to_document(mr: Any) -> Document:
# due to local time discrepancies with UTC
doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),
primary_owners=[get_author(mr.author)],
metadata={
"state": mr.state,
"type": "MergeRequest"
},
metadata={"state": mr.state, "type": "MergeRequest"},
)
@ -70,41 +68,42 @@ def _convert_issue_to_document(issue: Any) -> Document:
# due to local time discrepancies with UTC
doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
primary_owners=[get_author(issue.author)],
metadata={
"state": issue.state,
"type": issue.type | "Issue"
},
metadata={"state": issue.state, "type": issue.type | "Issue"},
)
class GitlabConnector(LoadConnector, PollConnector):
def __init__(self,
project_owner: str,
project_name: str,
batch_size: int = INDEX_BATCH_SIZE,
state_filter: str = "all",
include_mrs: bool = True,
include_issues: bool = True,
) -> None:
self.project_owner=project_owner,
self.project_name=project_name,
self.batch_size=batch_size,
self.state_filter=state_filter,
self.include_mrs=include_mrs,
self.include_issues=include_issues,
self.gitlab_client :gitlab.Gitlab | None = None
def __init__(
self,
project_owner: str,
project_name: str,
batch_size: int = INDEX_BATCH_SIZE,
state_filter: str = "all",
include_mrs: bool = True,
include_issues: bool = True,
) -> None:
self.project_owner = project_owner
self.project_name = project_name
self.batch_size = batch_size
self.state_filter = state_filter
self.include_mrs = include_mrs
self.include_issues = include_issues
self.gitlab_client: gitlab.Gitlab | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.gitlab_client = gitlab.Gitlab(credentials["gitlab_url"], private_token=credentials['gitlab_access_token'])
self.gitlab_client = gitlab.Gitlab(
credentials["gitlab_url"], private_token=credentials["gitlab_access_token"]
)
return None
def _fetch_from_gitlab(self, start: datetime | None = None, end: datetime | None = None) -> GenerateDocumentsOutput:
def _fetch_from_gitlab(
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
if self.gitlab_client is None:
raise ConnectorMissingCredentialError("Gitlab")
project = self.gitlab_client.projects.get(f"{self.project_owner[0]}/{self.project_name[0]}")
project = self.gitlab_client.projects.get(
f"{self.project_owner[0]}/{self.project_name[0]}"
)
if self.include_mrs:
merge_requests = project.mergerequests.list(
@ -112,9 +111,11 @@ class GitlabConnector(LoadConnector, PollConnector):
)
for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):
doc_batch =[]
doc_batch: list[Document] = []
for mr in mr_batch:
mr.updated_at = datetime.strptime(mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
mr.updated_at = datetime.strptime(
mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ"
)
if start is not None and mr.updated_at < start:
yield doc_batch
return
@ -124,21 +125,21 @@ class GitlabConnector(LoadConnector, PollConnector):
yield doc_batch
if self.include_issues:
issues = project.issues.list(
state=self.state_filter
)
issues = project.issues.list(state=self.state_filter)
for issue_batch in _batch_gitlab_objects(issues, self.batch_size):
doc_batch =[]
doc_batch = []
for issue in issue_batch:
issue.updated_at = datetime.strptime(issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
issue.updated_at = datetime.strptime(
issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ"
)
if start is not None and issue.updated_at < start:
yield doc_batch
return
if end is not None and issue.updated_at > end:
continue
if issue.updated_at is not None :
# MRs are handled separately
if issue.updated_at is not None:
# MRs are handled separately
continue
doc_batch.append(_convert_issue_to_document(issue))
yield doc_batch
@ -146,19 +147,17 @@ class GitlabConnector(LoadConnector, PollConnector):
def load_from_state(self) -> GenerateDocumentsOutput:
return self._fetch_from_gitlab()
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
start_datetime = datetime.utcfromtimestamp(start)
end_datetime = datetime.utcfromtimestamp(end)
return self._fetch_from_gitlab(start_datetime, end_datetime)
if __name__ == "__main__":
import os
connector = GitlabConnector(
# gitlab_url="https://gitlab.com/api/v4",
project_owner=os.environ["PROJECT_OWNER"],
@ -172,11 +171,8 @@ if __name__ == "__main__":
connector.load_credentials(
{
"github_access_token": os.environ["GITLAB_ACCESS_TOKEN"],
"gitlab_url":os.environ["GITLAB_URL"]
"gitlab_url": os.environ["GITLAB_URL"],
}
)
document_batches = connector.load_from_state()
print(next(document_batches))

View File

@ -108,7 +108,8 @@ const Main = () => {
formBody={
<>
<Text>
If you are using GitLab Cloud, keep the default value below</Text>
If you are using GitLab Cloud, keep the default value below
</Text>
<TextFormField
name="gitlab_url"
label="GitLab URL:"
@ -131,7 +132,7 @@ const Main = () => {
})}
initialValues={{
gitlab_access_token: "",
gitlab_url: "https://gitlab.com"
gitlab_url: "https://gitlab.com",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {

View File

@ -45,7 +45,7 @@ export const ConnectorTitle = ({
"Repo",
`${typedConnector.connector_specific_config.project_owner}/${typedConnector.connector_specific_config.project_name}`
);
} else if (connector.source === "confluence") {
} else if (connector.source === "confluence") {
const typedConnector = connector as Connector<ConfluenceConfig>;
additionalMetadata.set(
"Wiki URL",

View File

@ -356,7 +356,6 @@ export const GithubIcon = ({
);
};
export const GoogleDriveIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -61,10 +61,10 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Github",
category: SourceCategory.AppConnection,
},
gitlab :{
icon:GitlabIcon,
displayName:"Gitlab",
category:SourceCategory.AppConnection,
gitlab: {
icon: GitlabIcon,
displayName: "Gitlab",
category: SourceCategory.AppConnection,
},
confluence: {
icon: ConfluenceIcon,

View File

@ -85,7 +85,6 @@ export interface GitlabConfig {
include_issues: boolean;
}
export interface GoogleDriveConfig {
folder_paths?: string[];
include_shared?: boolean;
@ -199,7 +198,7 @@ export interface GithubCredentialJson {
}
export interface GitlabCredentialJson {
gitlab_url:string,
gitlab_url: string;
gitlab_access_token: string;
}