mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 21:26:01 +02:00
Fix #3764: Dynamically handle default branch in GitLab connector
This commit is contained in:
committed by
Chris Weaver
parent
1430a18d44
commit
f7543c6285
@@ -88,29 +88,35 @@ def _convert_issue_to_document(issue: Any) -> Document:
|
|||||||
def _convert_code_to_document(
|
def _convert_code_to_document(
|
||||||
project: Project, file: Any, url: str, projectName: str, projectOwner: str
|
project: Project, file: Any, url: str, projectName: str, projectOwner: str
|
||||||
) -> Document:
|
) -> Document:
|
||||||
|
# Dynamically get the default branch from the project object
|
||||||
|
default_branch = project.default_branch
|
||||||
|
|
||||||
|
# Fetch the file content using the correct branch
|
||||||
file_content_obj = project.files.get(
|
file_content_obj = project.files.get(
|
||||||
file_path=file["path"], ref="master"
|
file_path=file["path"], ref=default_branch # Use the default branch
|
||||||
) # Replace 'master' with your branch name if needed
|
)
|
||||||
try:
|
try:
|
||||||
file_content = file_content_obj.decode().decode("utf-8")
|
file_content = file_content_obj.decode().decode("utf-8")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
file_content = file_content_obj.decode().decode("latin-1")
|
file_content = file_content_obj.decode().decode("latin-1")
|
||||||
|
|
||||||
file_url = f"{url}/{projectOwner}/{projectName}/-/blob/master/{file['path']}" # Construct the file URL
|
# Construct the file URL dynamically using the default branch
|
||||||
|
file_url = f"{url}/{projectOwner}/{projectName}/-/blob/{default_branch}/{file['path']}"
|
||||||
|
|
||||||
|
# Create and return a Document object
|
||||||
doc = Document(
|
doc = Document(
|
||||||
id=file["id"],
|
id=file["id"],
|
||||||
sections=[TextSection(link=file_url, text=file_content)],
|
sections=[TextSection(link=file_url, text=file_content)],
|
||||||
source=DocumentSource.GITLAB,
|
source=DocumentSource.GITLAB,
|
||||||
semantic_identifier=file["name"],
|
semantic_identifier=file["name"],
|
||||||
doc_updated_at=datetime.now().replace(
|
doc_updated_at=datetime.now().replace(tzinfo=timezone.utc),
|
||||||
tzinfo=timezone.utc
|
primary_owners=[], # Add owners if needed
|
||||||
), # Use current time as updated_at
|
|
||||||
primary_owners=[], # Fill this as needed
|
|
||||||
metadata={"type": "CodeFile"},
|
metadata={"type": "CodeFile"},
|
||||||
)
|
)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _should_exclude(path: str) -> bool:
|
def _should_exclude(path: str) -> bool:
|
||||||
"""Check if a path matches any of the exclude patterns."""
|
"""Check if a path matches any of the exclude patterns."""
|
||||||
return any(fnmatch.fnmatch(path, pattern) for pattern in exclude_patterns)
|
return any(fnmatch.fnmatch(path, pattern) for pattern in exclude_patterns)
|
||||||
|
Reference in New Issue
Block a user