Fix #3764: Dynamically handle default branch in GitLab connector

This commit is contained in:
ThomaciousD 2025-01-24 17:09:06 +01:00 committed by Chris Weaver
parent 1430a18d44
commit f7543c6285

View File

@ -88,29 +88,35 @@ def _convert_issue_to_document(issue: Any) -> Document:
def _convert_code_to_document(
project: Project, file: Any, url: str, projectName: str, projectOwner: str
) -> Document:
# Dynamically get the default branch from the project object
default_branch = project.default_branch
# Fetch the file content using the correct branch
file_content_obj = project.files.get(
file_path=file["path"], ref="master"
) # Replace 'master' with your branch name if needed
file_path=file["path"], ref=default_branch # Use the default branch
)
try:
file_content = file_content_obj.decode().decode("utf-8")
except UnicodeDecodeError:
file_content = file_content_obj.decode().decode("latin-1")
file_url = f"{url}/{projectOwner}/{projectName}/-/blob/master/{file['path']}" # Construct the file URL
# Construct the file URL dynamically using the default branch
file_url = f"{url}/{projectOwner}/{projectName}/-/blob/{default_branch}/{file['path']}"
# Create and return a Document object
doc = Document(
id=file["id"],
sections=[TextSection(link=file_url, text=file_content)],
source=DocumentSource.GITLAB,
semantic_identifier=file["name"],
doc_updated_at=datetime.now().replace(
tzinfo=timezone.utc
), # Use current time as updated_at
primary_owners=[], # Fill this as needed
doc_updated_at=datetime.now().replace(tzinfo=timezone.utc),
primary_owners=[], # Add owners if needed
metadata={"type": "CodeFile"},
)
return doc
def _should_exclude(path: str) -> bool:
"""Check if a path matches any of the exclude patterns."""
return any(fnmatch.fnmatch(path, pattern) for pattern in exclude_patterns)