mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
Gitlab Connector (#931)
This commit is contained in:
parent
1981a02473
commit
1670d923aa
@ -63,6 +63,7 @@ class DocumentSource(str, Enum):
|
||||
GOOGLE_DRIVE = "google_drive"
|
||||
REQUESTTRACKER = "requesttracker"
|
||||
GITHUB = "github"
|
||||
GITLAB = "gitlab"
|
||||
GURU = "guru"
|
||||
BOOKSTACK = "bookstack"
|
||||
CONFLUENCE = "confluence"
|
||||
|
@ -8,6 +8,7 @@ from danswer.connectors.danswer_jira.connector import JiraConnector
|
||||
from danswer.connectors.document360.connector import Document360Connector
|
||||
from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
from danswer.connectors.gitlab.connector import GitlabConnector
|
||||
from danswer.connectors.gong.connector import GongConnector
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.google_site.connector import GoogleSitesConnector
|
||||
@ -47,6 +48,7 @@ def identify_connector_class(
|
||||
InputType.POLL: SlackPollConnector,
|
||||
},
|
||||
DocumentSource.GITHUB: GithubConnector,
|
||||
DocumentSource.GITLAB: GitlabConnector,
|
||||
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
|
||||
DocumentSource.BOOKSTACK: BookstackConnector,
|
||||
DocumentSource.CONFLUENCE: ConfluenceConnector,
|
||||
|
0
backend/danswer/connectors/gitlab/__init__.py
Normal file
0
backend/danswer/connectors/gitlab/__init__.py
Normal file
182
backend/danswer/connectors/gitlab/connector.py
Normal file
182
backend/danswer/connectors/gitlab/connector.py
Normal file
@ -0,0 +1,182 @@
|
||||
|
||||
import itertools
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
import gitlab
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import BasicExpertInfo, ConnectorMissingCredentialError
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _batch_gitlab_objects(
|
||||
git_objs: list[Any],
|
||||
batch_size: int
|
||||
) -> Iterator[list[Any]]:
|
||||
it = iter(git_objs)
|
||||
while True:
|
||||
batch = list(itertools.islice(it, batch_size[0]))
|
||||
if not batch:
|
||||
break
|
||||
yield batch
|
||||
|
||||
def get_author(author:Any)-> BasicExpertInfo:
|
||||
return BasicExpertInfo(
|
||||
display_name=author.get("name"),
|
||||
first_name=author.get("name").split(" ")[0],
|
||||
last_name=author.get("name").split(" ")[1]
|
||||
|
||||
)
|
||||
|
||||
|
||||
def _convert_merge_request_to_document(mr: Any) -> Document:
|
||||
return Document(
|
||||
id=mr.web_url,
|
||||
sections=[Section(link=mr.web_url, text=mr.description or "")],
|
||||
source=DocumentSource.GITLAB,
|
||||
semantic_identifier=mr.title,
|
||||
# updated_at is UTC time but is timezone unaware, explicitly add UTC
|
||||
# as there is logic in indexing to prevent wrong timestamped docs
|
||||
# due to local time discrepancies with UTC
|
||||
doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),
|
||||
primary_owners=[get_author(mr.author)],
|
||||
metadata={
|
||||
"state": mr.state,
|
||||
"type": "MergeRequest"
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _convert_issue_to_document(issue: Any) -> Document:
|
||||
return Document(
|
||||
id=issue.web_url,
|
||||
sections=[Section(link=issue.web_url, text=issue.description or "")],
|
||||
source=DocumentSource.GITLAB,
|
||||
semantic_identifier=issue.title,
|
||||
# updated_at is UTC time but is timezone unaware, explicitly add UTC
|
||||
# as there is logic in indexing to prevent wrong timestamped docs
|
||||
# due to local time discrepancies with UTC
|
||||
doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
|
||||
primary_owners=[get_author(issue.author)],
|
||||
metadata={
|
||||
"state": issue.state,
|
||||
"type": issue.type | "Issue"
|
||||
},
|
||||
)
|
||||
|
||||
class GitlabConnector(LoadConnector, PollConnector):
|
||||
def __init__(self,
|
||||
project_owner: str,
|
||||
project_name: str,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
state_filter: str = "all",
|
||||
include_mrs: bool = True,
|
||||
include_issues: bool = True,
|
||||
) -> None:
|
||||
self.project_owner=project_owner,
|
||||
self.project_name=project_name,
|
||||
self.batch_size=batch_size,
|
||||
self.state_filter=state_filter,
|
||||
self.include_mrs=include_mrs,
|
||||
self.include_issues=include_issues,
|
||||
self.gitlab_client :gitlab.Gitlab | None = None
|
||||
|
||||
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
self.gitlab_client = gitlab.Gitlab(credentials["gitlab_url"], private_token=credentials['gitlab_access_token'])
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def _fetch_from_gitlab(self, start: datetime | None = None, end: datetime | None = None) -> GenerateDocumentsOutput:
|
||||
if self.gitlab_client is None:
|
||||
raise ConnectorMissingCredentialError("Gitlab")
|
||||
project = self.gitlab_client.projects.get(f"{self.project_owner[0]}/{self.project_name[0]}")
|
||||
|
||||
if self.include_mrs:
|
||||
merge_requests = project.mergerequests.list(
|
||||
state=self.state_filter, order_by="updated_at", sort="desc"
|
||||
)
|
||||
|
||||
for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):
|
||||
doc_batch =[]
|
||||
for mr in mr_batch:
|
||||
mr.updated_at = datetime.strptime(mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
if start is not None and mr.updated_at < start:
|
||||
yield doc_batch
|
||||
return
|
||||
if end is not None and mr.updated_at > end:
|
||||
continue
|
||||
doc_batch.append(_convert_merge_request_to_document(mr))
|
||||
yield doc_batch
|
||||
|
||||
if self.include_issues:
|
||||
issues = project.issues.list(
|
||||
state=self.state_filter
|
||||
)
|
||||
|
||||
for issue_batch in _batch_gitlab_objects(issues, self.batch_size):
|
||||
doc_batch =[]
|
||||
for issue in issue_batch:
|
||||
issue.updated_at = datetime.strptime(issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
if start is not None and issue.updated_at < start:
|
||||
yield doc_batch
|
||||
return
|
||||
if end is not None and issue.updated_at > end:
|
||||
continue
|
||||
if issue.updated_at is not None :
|
||||
# MRs are handled separately
|
||||
continue
|
||||
doc_batch.append(_convert_issue_to_document(issue))
|
||||
yield doc_batch
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self._fetch_from_gitlab()
|
||||
|
||||
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
|
||||
start_datetime = datetime.utcfromtimestamp(start)
|
||||
end_datetime = datetime.utcfromtimestamp(end)
|
||||
return self._fetch_from_gitlab(start_datetime, end_datetime)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
connector = GitlabConnector(
|
||||
# gitlab_url="https://gitlab.com/api/v4",
|
||||
project_owner=os.environ["PROJECT_OWNER"],
|
||||
project_name=os.environ["PROJECT_NAME"],
|
||||
batch_size=10,
|
||||
state_filter="all",
|
||||
include_mrs=True,
|
||||
include_issues=True,
|
||||
)
|
||||
|
||||
connector.load_credentials(
|
||||
{
|
||||
"github_access_token": os.environ["GITLAB_ACCESS_TOKEN"],
|
||||
"gitlab_url":os.environ["GITLAB_URL"]
|
||||
}
|
||||
)
|
||||
document_batches = connector.load_from_state()
|
||||
print(next(document_batches))
|
||||
|
||||
|
||||
|
@ -36,6 +36,7 @@ psycopg2-binary==2.9.9
|
||||
pycryptodome==3.19.1
|
||||
pydantic==1.10.7
|
||||
PyGithub==1.58.2
|
||||
python-gitlab==3.9.0
|
||||
pypdf==3.17.0
|
||||
pytest-playwright==0.3.2
|
||||
python-dotenv==1.0.0
|
||||
|
BIN
web/public/Gitlab.png
Normal file
BIN
web/public/Gitlab.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 19 KiB |
253
web/src/app/admin/connectors/gitlab/page.tsx
Normal file
253
web/src/app/admin/connectors/gitlab/page.tsx
Normal file
@ -0,0 +1,253 @@
|
||||
"use client";
|
||||
|
||||
import * as Yup from "yup";
|
||||
import { GitlabIcon, TrashIcon } from "@/components/icons/icons";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import useSWR, { useSWRConfig } from "swr";
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import {
|
||||
GitlabConfig,
|
||||
GitlabCredentialJson,
|
||||
Credential,
|
||||
ConnectorIndexingStatus,
|
||||
} from "@/lib/types";
|
||||
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
|
||||
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||
import { usePublicCredentials } from "@/lib/hooks";
|
||||
import { Card, Divider, Text, Title } from "@tremor/react";
|
||||
import { AdminPageTitle } from "@/components/admin/Title";
|
||||
|
||||
const Main = () => {
|
||||
const { mutate } = useSWRConfig();
|
||||
const {
|
||||
data: connectorIndexingStatuses,
|
||||
isLoading: isConnectorIndexingStatusesLoading,
|
||||
error: isConnectorIndexingStatusesError,
|
||||
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
|
||||
"/api/manage/admin/connector/indexing-status",
|
||||
fetcher
|
||||
);
|
||||
|
||||
const {
|
||||
data: credentialsData,
|
||||
isLoading: isCredentialsLoading,
|
||||
error: isCredentialsError,
|
||||
refreshCredentials,
|
||||
} = usePublicCredentials();
|
||||
|
||||
if (
|
||||
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
|
||||
(!credentialsData && isCredentialsLoading)
|
||||
) {
|
||||
return <LoadingAnimation text="Loading" />;
|
||||
}
|
||||
|
||||
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
|
||||
return <div>Failed to load connectors</div>;
|
||||
}
|
||||
|
||||
if (isCredentialsError || !credentialsData) {
|
||||
return <div>Failed to load credentials</div>;
|
||||
}
|
||||
|
||||
const gitlabConnectorIndexingStatuses: ConnectorIndexingStatus<
|
||||
GitlabConfig,
|
||||
GitlabCredentialJson
|
||||
>[] = connectorIndexingStatuses.filter(
|
||||
(connectorIndexingStatus) =>
|
||||
connectorIndexingStatus.connector.source === "gitlab"
|
||||
);
|
||||
const gitlabCredential: Credential<GitlabCredentialJson> | undefined =
|
||||
credentialsData.find(
|
||||
(credential) => credential.credential_json?.gitlab_access_token
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Title className="mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 1: Provide your access token
|
||||
</Title>
|
||||
{gitlabCredential ? (
|
||||
<>
|
||||
{" "}
|
||||
<div className="flex mb-1 text-sm">
|
||||
<p className="my-auto">Existing Access Token: </p>
|
||||
<p className="ml-1 italic my-auto">
|
||||
{gitlabCredential.credential_json.gitlab_access_token}
|
||||
</p>{" "}
|
||||
<button
|
||||
className="ml-1 hover:bg-hover rounded p-1"
|
||||
onClick={async () => {
|
||||
await adminDeleteCredential(gitlabCredential.id);
|
||||
refreshCredentials();
|
||||
}}
|
||||
>
|
||||
<TrashIcon />
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Text>
|
||||
If you don't have an access token, read the guide{" "}
|
||||
<a
|
||||
className="text-blue-500"
|
||||
href="https://docs.danswer.dev/connectors/gitlab"
|
||||
target="_blank"
|
||||
>
|
||||
here
|
||||
</a>{" "}
|
||||
on how to get one from Gitlab.
|
||||
</Text>
|
||||
<Card className="mt-4">
|
||||
<CredentialForm<GitlabCredentialJson>
|
||||
formBody={
|
||||
<>
|
||||
<Text>
|
||||
If you are using GitLab Cloud, keep the default value below</Text>
|
||||
<TextFormField
|
||||
name="gitlab_url"
|
||||
label="GitLab URL:"
|
||||
type="text"
|
||||
placeholder="https://gitlab.com"
|
||||
/>
|
||||
|
||||
<TextFormField
|
||||
name="gitlab_access_token"
|
||||
label="Access Token:"
|
||||
type="password"
|
||||
/>
|
||||
</>
|
||||
}
|
||||
validationSchema={Yup.object().shape({
|
||||
gitlab_url: Yup.string().default("https://gitlab.com"),
|
||||
gitlab_access_token: Yup.string().required(
|
||||
"Please enter the access token for Gitlab"
|
||||
),
|
||||
})}
|
||||
initialValues={{
|
||||
gitlab_access_token: "",
|
||||
gitlab_url: "https://gitlab.com"
|
||||
}}
|
||||
onSubmit={(isSuccess) => {
|
||||
if (isSuccess) {
|
||||
refreshCredentials();
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
</>
|
||||
)}
|
||||
|
||||
<Title className="mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 2: Which repositories do you want to make searchable?
|
||||
</Title>
|
||||
|
||||
{gitlabConnectorIndexingStatuses.length > 0 && (
|
||||
<>
|
||||
<Text className="mb-2">
|
||||
We pull the latest Pull Requests from each project listed below
|
||||
every <b>10</b> minutes.
|
||||
</Text>
|
||||
<div className="mb-2">
|
||||
<ConnectorsTable<GitlabConfig, GitlabCredentialJson>
|
||||
connectorIndexingStatuses={gitlabConnectorIndexingStatuses}
|
||||
liveCredential={gitlabCredential}
|
||||
getCredential={(credential) =>
|
||||
credential.credential_json.gitlab_access_token
|
||||
}
|
||||
onCredentialLink={async (connectorId) => {
|
||||
if (gitlabCredential) {
|
||||
await linkCredential(connectorId, gitlabCredential.id);
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
specialColumns={[
|
||||
{
|
||||
header: "Project",
|
||||
key: "project",
|
||||
getValue: (ccPairStatus) => {
|
||||
const connectorConfig =
|
||||
ccPairStatus.connector.connector_specific_config;
|
||||
return `${connectorConfig.project_owner}/${connectorConfig.project_name}`;
|
||||
},
|
||||
},
|
||||
]}
|
||||
onUpdate={() =>
|
||||
mutate("/api/manage/admin/connector/indexing-status")
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
<Divider />
|
||||
</>
|
||||
)}
|
||||
|
||||
{gitlabCredential ? (
|
||||
<Card className="mt-4">
|
||||
<h2 className="font-bold mb-3">Connect to a New Project</h2>
|
||||
<ConnectorForm<GitlabConfig>
|
||||
nameBuilder={(values) =>
|
||||
`GitlabConnector-${values.project_owner}/${values.project_name}`
|
||||
}
|
||||
ccPairNameBuilder={(values) =>
|
||||
`${values.project_owner}/${values.project_name}`
|
||||
}
|
||||
source="gitlab"
|
||||
inputType="poll"
|
||||
formBody={
|
||||
<>
|
||||
<TextFormField name="project_owner" label="Project Owner:" />
|
||||
<TextFormField name="project_name" label="Project Name:" />
|
||||
</>
|
||||
}
|
||||
validationSchema={Yup.object().shape({
|
||||
project_owner: Yup.string().required(
|
||||
"Please enter the owner of the project to index e.g. danswer-ai"
|
||||
),
|
||||
project_name: Yup.string().required(
|
||||
"Please enter the name of the project to index e.g. danswer "
|
||||
),
|
||||
include_mrs: Yup.boolean().required(),
|
||||
include_issues: Yup.boolean().required(),
|
||||
})}
|
||||
initialValues={{
|
||||
project_owner: "",
|
||||
project_name: "",
|
||||
include_mrs: true,
|
||||
include_issues: true,
|
||||
}}
|
||||
refreshFreq={10 * 60} // 10 minutes
|
||||
credentialId={gitlabCredential.id}
|
||||
/>
|
||||
</Card>
|
||||
) : (
|
||||
<Text>
|
||||
Please provide your access token in Step 1 first! Once done with that,
|
||||
you can then specify which Gitlab repositories you want to make
|
||||
searchable.
|
||||
</Text>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<div className="container mx-auto">
|
||||
<div className="mb-4">
|
||||
<HealthCheckBanner />
|
||||
</div>
|
||||
|
||||
<AdminPageTitle
|
||||
icon={<GitlabIcon size={32} />}
|
||||
title="Gitlab MRs + Issues"
|
||||
/>
|
||||
|
||||
<Main />
|
||||
</div>
|
||||
);
|
||||
}
|
@ -3,6 +3,7 @@ import {
|
||||
ConfluenceConfig,
|
||||
Connector,
|
||||
GithubConfig,
|
||||
GitlabConfig,
|
||||
GoogleDriveConfig,
|
||||
JiraConfig,
|
||||
SlackConfig,
|
||||
@ -38,7 +39,13 @@ export const ConnectorTitle = ({
|
||||
"Repo",
|
||||
`${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
|
||||
);
|
||||
} else if (connector.source === "confluence") {
|
||||
} else if (connector.source === "gitlab") {
|
||||
const typedConnector = connector as Connector<GitlabConfig>;
|
||||
additionalMetadata.set(
|
||||
"Repo",
|
||||
`${typedConnector.connector_specific_config.project_owner}/${typedConnector.connector_specific_config.project_name}`
|
||||
);
|
||||
} else if (connector.source === "confluence") {
|
||||
const typedConnector = connector as Connector<ConfluenceConfig>;
|
||||
additionalMetadata.set(
|
||||
"Wiki URL",
|
||||
|
@ -329,7 +329,19 @@ export const SlackIcon = ({
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const GitlabIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => {
|
||||
return (
|
||||
<div
|
||||
style={{ width: `${size}px`, height: `${size}px` }}
|
||||
className={`w-[${size}px] h-[${size}px] ` + className}
|
||||
>
|
||||
<Image src="/Gitlab.png" alt="Logo" width="96" height="96" />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
export const GithubIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
@ -344,6 +356,7 @@ export const GithubIcon = ({
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
export const GoogleDriveIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
|
@ -4,6 +4,7 @@ import {
|
||||
Document360Icon,
|
||||
FileIcon,
|
||||
GithubIcon,
|
||||
GitlabIcon,
|
||||
GlobeIcon,
|
||||
GongIcon,
|
||||
GoogleDriveIcon,
|
||||
@ -60,6 +61,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
|
||||
displayName: "Github",
|
||||
category: SourceCategory.AppConnection,
|
||||
},
|
||||
gitlab :{
|
||||
icon:GitlabIcon,
|
||||
displayName:"Gitlab",
|
||||
category:SourceCategory.AppConnection,
|
||||
},
|
||||
confluence: {
|
||||
icon: ConfluenceIcon,
|
||||
displayName: "Confluence",
|
||||
|
@ -12,6 +12,7 @@ export interface User {
|
||||
export type ValidSources =
|
||||
| "web"
|
||||
| "github"
|
||||
| "gitlab"
|
||||
| "slack"
|
||||
| "google_drive"
|
||||
| "bookstack"
|
||||
@ -77,6 +78,14 @@ export interface GithubConfig {
|
||||
include_issues: boolean;
|
||||
}
|
||||
|
||||
export interface GitlabConfig {
|
||||
project_owner: string;
|
||||
project_name: string;
|
||||
include_mrs: boolean;
|
||||
include_issues: boolean;
|
||||
}
|
||||
|
||||
|
||||
export interface GoogleDriveConfig {
|
||||
folder_paths?: string[];
|
||||
include_shared?: boolean;
|
||||
@ -189,6 +198,11 @@ export interface GithubCredentialJson {
|
||||
github_access_token: string;
|
||||
}
|
||||
|
||||
export interface GitlabCredentialJson {
|
||||
gitlab_url:string,
|
||||
gitlab_access_token: string;
|
||||
}
|
||||
|
||||
export interface BookstackCredentialJson {
|
||||
bookstack_base_url: string;
|
||||
bookstack_api_token_id: string;
|
||||
|
Loading…
x
Reference in New Issue
Block a user