Gitlab Connector (#931)

This commit is contained in:
Rutik Thakre 2024-01-19 05:13:17 +05:30 committed by GitHub
parent 1981a02473
commit 1670d923aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 481 additions and 2 deletions

View File

@ -63,6 +63,7 @@ class DocumentSource(str, Enum):
GOOGLE_DRIVE = "google_drive"
REQUESTTRACKER = "requesttracker"
GITHUB = "github"
GITLAB = "gitlab"
GURU = "guru"
BOOKSTACK = "bookstack"
CONFLUENCE = "confluence"

View File

@ -8,6 +8,7 @@ from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.document360.connector import Document360Connector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.gitlab.connector import GitlabConnector
from danswer.connectors.gong.connector import GongConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.google_site.connector import GoogleSitesConnector
@ -47,6 +48,7 @@ def identify_connector_class(
InputType.POLL: SlackPollConnector,
},
DocumentSource.GITHUB: GithubConnector,
DocumentSource.GITLAB: GitlabConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.BOOKSTACK: BookstackConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector,

View File

@ -0,0 +1,182 @@
import itertools
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import Any
import gitlab
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import BasicExpertInfo, ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
logger = setup_logger()
def _batch_gitlab_objects(
git_objs: list[Any],
batch_size: int
) -> Iterator[list[Any]]:
it = iter(git_objs)
while True:
batch = list(itertools.islice(it, batch_size[0]))
if not batch:
break
yield batch
def get_author(author:Any)-> BasicExpertInfo:
return BasicExpertInfo(
display_name=author.get("name"),
first_name=author.get("name").split(" ")[0],
last_name=author.get("name").split(" ")[1]
)
def _convert_merge_request_to_document(mr: Any) -> Document:
return Document(
id=mr.web_url,
sections=[Section(link=mr.web_url, text=mr.description or "")],
source=DocumentSource.GITLAB,
semantic_identifier=mr.title,
# updated_at is UTC time but is timezone unaware, explicitly add UTC
# as there is logic in indexing to prevent wrong timestamped docs
# due to local time discrepancies with UTC
doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),
primary_owners=[get_author(mr.author)],
metadata={
"state": mr.state,
"type": "MergeRequest"
},
)
def _convert_issue_to_document(issue: Any) -> Document:
return Document(
id=issue.web_url,
sections=[Section(link=issue.web_url, text=issue.description or "")],
source=DocumentSource.GITLAB,
semantic_identifier=issue.title,
# updated_at is UTC time but is timezone unaware, explicitly add UTC
# as there is logic in indexing to prevent wrong timestamped docs
# due to local time discrepancies with UTC
doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
primary_owners=[get_author(issue.author)],
metadata={
"state": issue.state,
"type": issue.type | "Issue"
},
)
class GitlabConnector(LoadConnector, PollConnector):
def __init__(self,
project_owner: str,
project_name: str,
batch_size: int = INDEX_BATCH_SIZE,
state_filter: str = "all",
include_mrs: bool = True,
include_issues: bool = True,
) -> None:
self.project_owner=project_owner,
self.project_name=project_name,
self.batch_size=batch_size,
self.state_filter=state_filter,
self.include_mrs=include_mrs,
self.include_issues=include_issues,
self.gitlab_client :gitlab.Gitlab | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.gitlab_client = gitlab.Gitlab(credentials["gitlab_url"], private_token=credentials['gitlab_access_token'])
return None
def _fetch_from_gitlab(self, start: datetime | None = None, end: datetime | None = None) -> GenerateDocumentsOutput:
if self.gitlab_client is None:
raise ConnectorMissingCredentialError("Gitlab")
project = self.gitlab_client.projects.get(f"{self.project_owner[0]}/{self.project_name[0]}")
if self.include_mrs:
merge_requests = project.mergerequests.list(
state=self.state_filter, order_by="updated_at", sort="desc"
)
for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):
doc_batch =[]
for mr in mr_batch:
mr.updated_at = datetime.strptime(mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
if start is not None and mr.updated_at < start:
yield doc_batch
return
if end is not None and mr.updated_at > end:
continue
doc_batch.append(_convert_merge_request_to_document(mr))
yield doc_batch
if self.include_issues:
issues = project.issues.list(
state=self.state_filter
)
for issue_batch in _batch_gitlab_objects(issues, self.batch_size):
doc_batch =[]
for issue in issue_batch:
issue.updated_at = datetime.strptime(issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
if start is not None and issue.updated_at < start:
yield doc_batch
return
if end is not None and issue.updated_at > end:
continue
if issue.updated_at is not None :
# MRs are handled separately
continue
doc_batch.append(_convert_issue_to_document(issue))
yield doc_batch
def load_from_state(self) -> GenerateDocumentsOutput:
return self._fetch_from_gitlab()
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
start_datetime = datetime.utcfromtimestamp(start)
end_datetime = datetime.utcfromtimestamp(end)
return self._fetch_from_gitlab(start_datetime, end_datetime)
if __name__ == "__main__":
import os
connector = GitlabConnector(
# gitlab_url="https://gitlab.com/api/v4",
project_owner=os.environ["PROJECT_OWNER"],
project_name=os.environ["PROJECT_NAME"],
batch_size=10,
state_filter="all",
include_mrs=True,
include_issues=True,
)
connector.load_credentials(
{
"github_access_token": os.environ["GITLAB_ACCESS_TOKEN"],
"gitlab_url":os.environ["GITLAB_URL"]
}
)
document_batches = connector.load_from_state()
print(next(document_batches))

View File

@ -36,6 +36,7 @@ psycopg2-binary==2.9.9
pycryptodome==3.19.1
pydantic==1.10.7
PyGithub==1.58.2
python-gitlab==3.9.0
pypdf==3.17.0
pytest-playwright==0.3.2
python-dotenv==1.0.0

BIN
web/public/Gitlab.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

View File

@ -0,0 +1,253 @@
"use client";
import * as Yup from "yup";
import { GitlabIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import {
GitlabConfig,
GitlabCredentialJson,
Credential,
ConnectorIndexingStatus,
} from "@/lib/types";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { LoadingAnimation } from "@/components/Loading";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { usePublicCredentials } from "@/lib/hooks";
import { Card, Divider, Text, Title } from "@tremor/react";
import { AdminPageTitle } from "@/components/admin/Title";
const Main = () => {
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const gitlabConnectorIndexingStatuses: ConnectorIndexingStatus<
GitlabConfig,
GitlabCredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "gitlab"
);
const gitlabCredential: Credential<GitlabCredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.gitlab_access_token
);
return (
<>
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access token
</Title>
{gitlabCredential ? (
<>
{" "}
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing Access Token: </p>
<p className="ml-1 italic my-auto">
{gitlabCredential.credential_json.gitlab_access_token}
</p>{" "}
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
await adminDeleteCredential(gitlabCredential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text>
If you don&apos;t have an access token, read the guide{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/gitlab"
target="_blank"
>
here
</a>{" "}
on how to get one from Gitlab.
</Text>
<Card className="mt-4">
<CredentialForm<GitlabCredentialJson>
formBody={
<>
<Text>
If you are using GitLab Cloud, keep the default value below</Text>
<TextFormField
name="gitlab_url"
label="GitLab URL:"
type="text"
placeholder="https://gitlab.com"
/>
<TextFormField
name="gitlab_access_token"
label="Access Token:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
gitlab_url: Yup.string().default("https://gitlab.com"),
gitlab_access_token: Yup.string().required(
"Please enter the access token for Gitlab"
),
})}
initialValues={{
gitlab_access_token: "",
gitlab_url: "https://gitlab.com"
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Which repositories do you want to make searchable?
</Title>
{gitlabConnectorIndexingStatuses.length > 0 && (
<>
<Text className="mb-2">
We pull the latest Pull Requests from each project listed below
every <b>10</b> minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<GitlabConfig, GitlabCredentialJson>
connectorIndexingStatuses={gitlabConnectorIndexingStatuses}
liveCredential={gitlabCredential}
getCredential={(credential) =>
credential.credential_json.gitlab_access_token
}
onCredentialLink={async (connectorId) => {
if (gitlabCredential) {
await linkCredential(connectorId, gitlabCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
specialColumns={[
{
header: "Project",
key: "project",
getValue: (ccPairStatus) => {
const connectorConfig =
ccPairStatus.connector.connector_specific_config;
return `${connectorConfig.project_owner}/${connectorConfig.project_name}`;
},
},
]}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
<Divider />
</>
)}
{gitlabCredential ? (
<Card className="mt-4">
<h2 className="font-bold mb-3">Connect to a New Project</h2>
<ConnectorForm<GitlabConfig>
nameBuilder={(values) =>
`GitlabConnector-${values.project_owner}/${values.project_name}`
}
ccPairNameBuilder={(values) =>
`${values.project_owner}/${values.project_name}`
}
source="gitlab"
inputType="poll"
formBody={
<>
<TextFormField name="project_owner" label="Project Owner:" />
<TextFormField name="project_name" label="Project Name:" />
</>
}
validationSchema={Yup.object().shape({
project_owner: Yup.string().required(
"Please enter the owner of the project to index e.g. danswer-ai"
),
project_name: Yup.string().required(
"Please enter the name of the project to index e.g. danswer "
),
include_mrs: Yup.boolean().required(),
include_issues: Yup.boolean().required(),
})}
initialValues={{
project_owner: "",
project_name: "",
include_mrs: true,
include_issues: true,
}}
refreshFreq={10 * 60} // 10 minutes
credentialId={gitlabCredential.id}
/>
</Card>
) : (
<Text>
Please provide your access token in Step 1 first! Once done with that,
you can then specify which Gitlab repositories you want to make
searchable.
</Text>
)}
</>
);
};
export default function Page() {
return (
<div className="container mx-auto">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle
icon={<GitlabIcon size={32} />}
title="Gitlab MRs + Issues"
/>
<Main />
</div>
);
}

View File

@ -3,6 +3,7 @@ import {
ConfluenceConfig,
Connector,
GithubConfig,
GitlabConfig,
GoogleDriveConfig,
JiraConfig,
SlackConfig,
@ -38,7 +39,13 @@ export const ConnectorTitle = ({
"Repo",
`${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
);
} else if (connector.source === "confluence") {
} else if (connector.source === "gitlab") {
const typedConnector = connector as Connector<GitlabConfig>;
additionalMetadata.set(
"Repo",
`${typedConnector.connector_specific_config.project_owner}/${typedConnector.connector_specific_config.project_name}`
);
} else if (connector.source === "confluence") {
const typedConnector = connector as Connector<ConfluenceConfig>;
additionalMetadata.set(
"Wiki URL",

View File

@ -329,7 +329,19 @@ export const SlackIcon = ({
</div>
);
};
export const GitlabIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src="/Gitlab.png" alt="Logo" width="96" height="96" />
</div>
);
};
export const GithubIcon = ({
size = 16,
className = defaultTailwindCSS,
@ -344,6 +356,7 @@ export const GithubIcon = ({
);
};
export const GoogleDriveIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -4,6 +4,7 @@ import {
Document360Icon,
FileIcon,
GithubIcon,
GitlabIcon,
GlobeIcon,
GongIcon,
GoogleDriveIcon,
@ -60,6 +61,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Github",
category: SourceCategory.AppConnection,
},
gitlab :{
icon:GitlabIcon,
displayName:"Gitlab",
category:SourceCategory.AppConnection,
},
confluence: {
icon: ConfluenceIcon,
displayName: "Confluence",

View File

@ -12,6 +12,7 @@ export interface User {
export type ValidSources =
| "web"
| "github"
| "gitlab"
| "slack"
| "google_drive"
| "bookstack"
@ -77,6 +78,14 @@ export interface GithubConfig {
include_issues: boolean;
}
export interface GitlabConfig {
project_owner: string;
project_name: string;
include_mrs: boolean;
include_issues: boolean;
}
export interface GoogleDriveConfig {
folder_paths?: string[];
include_shared?: boolean;
@ -189,6 +198,11 @@ export interface GithubCredentialJson {
github_access_token: string;
}
export interface GitlabCredentialJson {
gitlab_url:string,
gitlab_access_token: string;
}
export interface BookstackCredentialJson {
bookstack_base_url: string;
bookstack_api_token_id: string;