mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-28 21:05:17 +02:00
Fix GitLabs CI (#965)
This commit is contained in:
@@ -1,18 +1,20 @@
|
|||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
from collections.abc import Iterable
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import gitlab
|
import gitlab
|
||||||
|
|
||||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
from danswer.connectors.models import BasicExpertInfo, ConnectorMissingCredentialError
|
from danswer.connectors.models import BasicExpertInfo
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
@@ -22,22 +24,21 @@ logger = setup_logger()
|
|||||||
|
|
||||||
|
|
||||||
def _batch_gitlab_objects(
|
def _batch_gitlab_objects(
|
||||||
git_objs: list[Any],
|
git_objs: Iterable[Any], batch_size: int
|
||||||
batch_size: int
|
|
||||||
) -> Iterator[list[Any]]:
|
) -> Iterator[list[Any]]:
|
||||||
it = iter(git_objs)
|
it = iter(git_objs)
|
||||||
while True:
|
while True:
|
||||||
batch = list(itertools.islice(it, batch_size[0]))
|
batch = list(itertools.islice(it, batch_size))
|
||||||
if not batch:
|
if not batch:
|
||||||
break
|
break
|
||||||
yield batch
|
yield batch
|
||||||
|
|
||||||
|
|
||||||
def get_author(author: Any) -> BasicExpertInfo:
|
def get_author(author: Any) -> BasicExpertInfo:
|
||||||
return BasicExpertInfo(
|
return BasicExpertInfo(
|
||||||
display_name=author.get("name"),
|
display_name=author.get("name"),
|
||||||
first_name=author.get("name").split(" ")[0],
|
first_name=author.get("name").split(" ")[0],
|
||||||
last_name=author.get("name").split(" ")[1]
|
last_name=author.get("name").split(" ")[1],
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -52,10 +53,7 @@ def _convert_merge_request_to_document(mr: Any) -> Document:
|
|||||||
# due to local time discrepancies with UTC
|
# due to local time discrepancies with UTC
|
||||||
doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),
|
doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),
|
||||||
primary_owners=[get_author(mr.author)],
|
primary_owners=[get_author(mr.author)],
|
||||||
metadata={
|
metadata={"state": mr.state, "type": "MergeRequest"},
|
||||||
"state": mr.state,
|
|
||||||
"type": "MergeRequest"
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -70,14 +68,13 @@ def _convert_issue_to_document(issue: Any) -> Document:
|
|||||||
# due to local time discrepancies with UTC
|
# due to local time discrepancies with UTC
|
||||||
doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
|
doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
|
||||||
primary_owners=[get_author(issue.author)],
|
primary_owners=[get_author(issue.author)],
|
||||||
metadata={
|
metadata={"state": issue.state, "type": issue.type | "Issue"},
|
||||||
"state": issue.state,
|
|
||||||
"type": issue.type | "Issue"
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GitlabConnector(LoadConnector, PollConnector):
|
class GitlabConnector(LoadConnector, PollConnector):
|
||||||
def __init__(self,
|
def __init__(
|
||||||
|
self,
|
||||||
project_owner: str,
|
project_owner: str,
|
||||||
project_name: str,
|
project_name: str,
|
||||||
batch_size: int = INDEX_BATCH_SIZE,
|
batch_size: int = INDEX_BATCH_SIZE,
|
||||||
@@ -85,26 +82,28 @@ class GitlabConnector(LoadConnector, PollConnector):
|
|||||||
include_mrs: bool = True,
|
include_mrs: bool = True,
|
||||||
include_issues: bool = True,
|
include_issues: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.project_owner=project_owner,
|
self.project_owner = project_owner
|
||||||
self.project_name=project_name,
|
self.project_name = project_name
|
||||||
self.batch_size=batch_size,
|
self.batch_size = batch_size
|
||||||
self.state_filter=state_filter,
|
self.state_filter = state_filter
|
||||||
self.include_mrs=include_mrs,
|
self.include_mrs = include_mrs
|
||||||
self.include_issues=include_issues,
|
self.include_issues = include_issues
|
||||||
self.gitlab_client: gitlab.Gitlab | None = None
|
self.gitlab_client: gitlab.Gitlab | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||||
self.gitlab_client = gitlab.Gitlab(credentials["gitlab_url"], private_token=credentials['gitlab_access_token'])
|
self.gitlab_client = gitlab.Gitlab(
|
||||||
|
credentials["gitlab_url"], private_token=credentials["gitlab_access_token"]
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _fetch_from_gitlab(
|
||||||
|
self, start: datetime | None = None, end: datetime | None = None
|
||||||
def _fetch_from_gitlab(self, start: datetime | None = None, end: datetime | None = None) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
if self.gitlab_client is None:
|
if self.gitlab_client is None:
|
||||||
raise ConnectorMissingCredentialError("Gitlab")
|
raise ConnectorMissingCredentialError("Gitlab")
|
||||||
project = self.gitlab_client.projects.get(f"{self.project_owner[0]}/{self.project_name[0]}")
|
project = self.gitlab_client.projects.get(
|
||||||
|
f"{self.project_owner[0]}/{self.project_name[0]}"
|
||||||
|
)
|
||||||
|
|
||||||
if self.include_mrs:
|
if self.include_mrs:
|
||||||
merge_requests = project.mergerequests.list(
|
merge_requests = project.mergerequests.list(
|
||||||
@@ -112,9 +111,11 @@ class GitlabConnector(LoadConnector, PollConnector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):
|
for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):
|
||||||
doc_batch =[]
|
doc_batch: list[Document] = []
|
||||||
for mr in mr_batch:
|
for mr in mr_batch:
|
||||||
mr.updated_at = datetime.strptime(mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
|
mr.updated_at = datetime.strptime(
|
||||||
|
mr.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ"
|
||||||
|
)
|
||||||
if start is not None and mr.updated_at < start:
|
if start is not None and mr.updated_at < start:
|
||||||
yield doc_batch
|
yield doc_batch
|
||||||
return
|
return
|
||||||
@@ -124,14 +125,14 @@ class GitlabConnector(LoadConnector, PollConnector):
|
|||||||
yield doc_batch
|
yield doc_batch
|
||||||
|
|
||||||
if self.include_issues:
|
if self.include_issues:
|
||||||
issues = project.issues.list(
|
issues = project.issues.list(state=self.state_filter)
|
||||||
state=self.state_filter
|
|
||||||
)
|
|
||||||
|
|
||||||
for issue_batch in _batch_gitlab_objects(issues, self.batch_size):
|
for issue_batch in _batch_gitlab_objects(issues, self.batch_size):
|
||||||
doc_batch = []
|
doc_batch = []
|
||||||
for issue in issue_batch:
|
for issue in issue_batch:
|
||||||
issue.updated_at = datetime.strptime(issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ")
|
issue.updated_at = datetime.strptime(
|
||||||
|
issue.updated_at, "%Y-%m-%dT%H:%M:%S.%fZ"
|
||||||
|
)
|
||||||
if start is not None and issue.updated_at < start:
|
if start is not None and issue.updated_at < start:
|
||||||
yield doc_batch
|
yield doc_batch
|
||||||
return
|
return
|
||||||
@@ -146,19 +147,17 @@ class GitlabConnector(LoadConnector, PollConnector):
|
|||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
return self._fetch_from_gitlab()
|
return self._fetch_from_gitlab()
|
||||||
|
|
||||||
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
|
def poll_source(
|
||||||
|
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||||
|
) -> GenerateDocumentsOutput:
|
||||||
start_datetime = datetime.utcfromtimestamp(start)
|
start_datetime = datetime.utcfromtimestamp(start)
|
||||||
end_datetime = datetime.utcfromtimestamp(end)
|
end_datetime = datetime.utcfromtimestamp(end)
|
||||||
return self._fetch_from_gitlab(start_datetime, end_datetime)
|
return self._fetch_from_gitlab(start_datetime, end_datetime)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os
|
import os
|
||||||
|
|
||||||
connector = GitlabConnector(
|
connector = GitlabConnector(
|
||||||
# gitlab_url="https://gitlab.com/api/v4",
|
# gitlab_url="https://gitlab.com/api/v4",
|
||||||
project_owner=os.environ["PROJECT_OWNER"],
|
project_owner=os.environ["PROJECT_OWNER"],
|
||||||
@@ -172,11 +171,8 @@ if __name__ == "__main__":
|
|||||||
connector.load_credentials(
|
connector.load_credentials(
|
||||||
{
|
{
|
||||||
"github_access_token": os.environ["GITLAB_ACCESS_TOKEN"],
|
"github_access_token": os.environ["GITLAB_ACCESS_TOKEN"],
|
||||||
"gitlab_url":os.environ["GITLAB_URL"]
|
"gitlab_url": os.environ["GITLAB_URL"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
document_batches = connector.load_from_state()
|
document_batches = connector.load_from_state()
|
||||||
print(next(document_batches))
|
print(next(document_batches))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -108,7 +108,8 @@ const Main = () => {
|
|||||||
formBody={
|
formBody={
|
||||||
<>
|
<>
|
||||||
<Text>
|
<Text>
|
||||||
If you are using GitLab Cloud, keep the default value below</Text>
|
If you are using GitLab Cloud, keep the default value below
|
||||||
|
</Text>
|
||||||
<TextFormField
|
<TextFormField
|
||||||
name="gitlab_url"
|
name="gitlab_url"
|
||||||
label="GitLab URL:"
|
label="GitLab URL:"
|
||||||
@@ -131,7 +132,7 @@ const Main = () => {
|
|||||||
})}
|
})}
|
||||||
initialValues={{
|
initialValues={{
|
||||||
gitlab_access_token: "",
|
gitlab_access_token: "",
|
||||||
gitlab_url: "https://gitlab.com"
|
gitlab_url: "https://gitlab.com",
|
||||||
}}
|
}}
|
||||||
onSubmit={(isSuccess) => {
|
onSubmit={(isSuccess) => {
|
||||||
if (isSuccess) {
|
if (isSuccess) {
|
||||||
|
@@ -356,7 +356,6 @@ export const GithubIcon = ({
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const GoogleDriveIcon = ({
|
export const GoogleDriveIcon = ({
|
||||||
size = 16,
|
size = 16,
|
||||||
className = defaultTailwindCSS,
|
className = defaultTailwindCSS,
|
||||||
|
@@ -85,7 +85,6 @@ export interface GitlabConfig {
|
|||||||
include_issues: boolean;
|
include_issues: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export interface GoogleDriveConfig {
|
export interface GoogleDriveConfig {
|
||||||
folder_paths?: string[];
|
folder_paths?: string[];
|
||||||
include_shared?: boolean;
|
include_shared?: boolean;
|
||||||
@@ -199,7 +198,7 @@ export interface GithubCredentialJson {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface GitlabCredentialJson {
|
export interface GitlabCredentialJson {
|
||||||
gitlab_url:string,
|
gitlab_url: string;
|
||||||
gitlab_access_token: string;
|
gitlab_access_token: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user