mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-25 15:30:59 +02:00
Comma separated list for Github repos (#4199)
This commit is contained in:
parent
a7a374dc81
commit
7d6db8d500
@ -0,0 +1,125 @@
|
|||||||
|
"""Update GitHub connector repo_name to repositories
|
||||||
|
|
||||||
|
Revision ID: 3934b1bc7b62
|
||||||
|
Revises: b7c2b63c4a03
|
||||||
|
Create Date: 2025-03-05 10:50:30.516962
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "3934b1bc7b62"
|
||||||
|
down_revision = "b7c2b63c4a03"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
logger = logging.getLogger("alembic.runtime.migration")
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# Get all GitHub connectors
|
||||||
|
conn = op.get_bind()
|
||||||
|
|
||||||
|
# First get all GitHub connectors
|
||||||
|
github_connectors = conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"""
|
||||||
|
SELECT id, connector_specific_config
|
||||||
|
FROM connector
|
||||||
|
WHERE source = 'GITHUB'
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Update each connector's config
|
||||||
|
updated_count = 0
|
||||||
|
for connector_id, config in github_connectors:
|
||||||
|
try:
|
||||||
|
if not config:
|
||||||
|
logger.warning(f"Connector {connector_id} has no config, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Parse the config if it's a string
|
||||||
|
if isinstance(config, str):
|
||||||
|
config = json.loads(config)
|
||||||
|
|
||||||
|
if "repo_name" not in config:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create new config with repositories instead of repo_name
|
||||||
|
new_config = dict(config)
|
||||||
|
repo_name_value = new_config.pop("repo_name")
|
||||||
|
new_config["repositories"] = repo_name_value
|
||||||
|
|
||||||
|
# Update the connector with the new config
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"""
|
||||||
|
UPDATE connector
|
||||||
|
SET connector_specific_config = :new_config
|
||||||
|
WHERE id = :connector_id
|
||||||
|
"""
|
||||||
|
),
|
||||||
|
{"connector_id": connector_id, "new_config": json.dumps(new_config)},
|
||||||
|
)
|
||||||
|
updated_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating connector {connector_id}: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# Get all GitHub connectors
|
||||||
|
conn = op.get_bind()
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Starting rollback of GitHub connectors from repositories to repo_name"
|
||||||
|
)
|
||||||
|
|
||||||
|
github_connectors = conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"""
|
||||||
|
SELECT id, connector_specific_config
|
||||||
|
FROM connector
|
||||||
|
WHERE source = 'GITHUB'
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
|
||||||
|
|
||||||
|
# Revert each GitHub connector to use repo_name instead of repositories
|
||||||
|
reverted_count = 0
|
||||||
|
for connector_id, config in github_connectors:
|
||||||
|
try:
|
||||||
|
if not config:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Parse the config if it's a string
|
||||||
|
if isinstance(config, str):
|
||||||
|
config = json.loads(config)
|
||||||
|
|
||||||
|
if "repositories" not in config:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create new config with repo_name instead of repositories
|
||||||
|
new_config = dict(config)
|
||||||
|
repositories_value = new_config.pop("repositories")
|
||||||
|
new_config["repo_name"] = repositories_value
|
||||||
|
|
||||||
|
# Update the connector with the new config
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"""
|
||||||
|
UPDATE connector
|
||||||
|
SET connector_specific_config = :new_config
|
||||||
|
WHERE id = :connector_id
|
||||||
|
"""
|
||||||
|
),
|
||||||
|
{"new_config": json.dumps(new_config), "connector_id": connector_id},
|
||||||
|
)
|
||||||
|
reverted_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reverting connector {connector_id}: {str(e)}")
|
@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
repo_owner: str,
|
repo_owner: str,
|
||||||
repo_name: str | None = None,
|
repositories: str | None = None,
|
||||||
batch_size: int = INDEX_BATCH_SIZE,
|
batch_size: int = INDEX_BATCH_SIZE,
|
||||||
state_filter: str = "all",
|
state_filter: str = "all",
|
||||||
include_prs: bool = True,
|
include_prs: bool = True,
|
||||||
include_issues: bool = False,
|
include_issues: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.repo_owner = repo_owner
|
self.repo_owner = repo_owner
|
||||||
self.repo_name = repo_name
|
self.repositories = repositories
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self.state_filter = state_filter
|
self.state_filter = state_filter
|
||||||
self.include_prs = include_prs
|
self.include_prs = include_prs
|
||||||
@ -157,11 +157,42 @@ class GithubConnector(LoadConnector, PollConnector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
|
return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
|
||||||
except RateLimitExceededException:
|
except RateLimitExceededException:
|
||||||
_sleep_after_rate_limit_exception(github_client)
|
_sleep_after_rate_limit_exception(github_client)
|
||||||
return self._get_github_repo(github_client, attempt_num + 1)
|
return self._get_github_repo(github_client, attempt_num + 1)
|
||||||
|
|
||||||
|
def _get_github_repos(
|
||||||
|
self, github_client: Github, attempt_num: int = 0
|
||||||
|
) -> list[Repository.Repository]:
|
||||||
|
"""Get specific repositories based on comma-separated repo_name string."""
|
||||||
|
if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
repos = []
|
||||||
|
# Split repo_name by comma and strip whitespace
|
||||||
|
repo_names = [
|
||||||
|
name.strip() for name in (cast(str, self.repositories)).split(",")
|
||||||
|
]
|
||||||
|
|
||||||
|
for repo_name in repo_names:
|
||||||
|
if repo_name: # Skip empty strings
|
||||||
|
try:
|
||||||
|
repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
|
||||||
|
repos.append(repo)
|
||||||
|
except GithubException as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return repos
|
||||||
|
except RateLimitExceededException:
|
||||||
|
_sleep_after_rate_limit_exception(github_client)
|
||||||
|
return self._get_github_repos(github_client, attempt_num + 1)
|
||||||
|
|
||||||
def _get_all_repos(
|
def _get_all_repos(
|
||||||
self, github_client: Github, attempt_num: int = 0
|
self, github_client: Github, attempt_num: int = 0
|
||||||
) -> list[Repository.Repository]:
|
) -> list[Repository.Repository]:
|
||||||
@ -189,11 +220,17 @@ class GithubConnector(LoadConnector, PollConnector):
|
|||||||
if self.github_client is None:
|
if self.github_client is None:
|
||||||
raise ConnectorMissingCredentialError("GitHub")
|
raise ConnectorMissingCredentialError("GitHub")
|
||||||
|
|
||||||
repos = (
|
repos = []
|
||||||
[self._get_github_repo(self.github_client)]
|
if self.repositories:
|
||||||
if self.repo_name
|
if "," in self.repositories:
|
||||||
else self._get_all_repos(self.github_client)
|
# Multiple repositories specified
|
||||||
)
|
repos = self._get_github_repos(self.github_client)
|
||||||
|
else:
|
||||||
|
# Single repository (backward compatibility)
|
||||||
|
repos = [self._get_github_repo(self.github_client)]
|
||||||
|
else:
|
||||||
|
# All repositories
|
||||||
|
repos = self._get_all_repos(self.github_client)
|
||||||
|
|
||||||
for repo in repos:
|
for repo in repos:
|
||||||
if self.include_prs:
|
if self.include_prs:
|
||||||
@ -268,11 +305,48 @@ class GithubConnector(LoadConnector, PollConnector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.repo_name:
|
if self.repositories:
|
||||||
test_repo = self.github_client.get_repo(
|
if "," in self.repositories:
|
||||||
f"{self.repo_owner}/{self.repo_name}"
|
# Multiple repositories specified
|
||||||
)
|
repo_names = [name.strip() for name in self.repositories.split(",")]
|
||||||
test_repo.get_contents("")
|
if not repo_names:
|
||||||
|
raise ConnectorValidationError(
|
||||||
|
"Invalid connector settings: No valid repository names provided."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate at least one repository exists and is accessible
|
||||||
|
valid_repos = False
|
||||||
|
validation_errors = []
|
||||||
|
|
||||||
|
for repo_name in repo_names:
|
||||||
|
if not repo_name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
test_repo = self.github_client.get_repo(
|
||||||
|
f"{self.repo_owner}/{repo_name}"
|
||||||
|
)
|
||||||
|
test_repo.get_contents("")
|
||||||
|
valid_repos = True
|
||||||
|
# If at least one repo is valid, we can proceed
|
||||||
|
break
|
||||||
|
except GithubException as e:
|
||||||
|
validation_errors.append(
|
||||||
|
f"Repository '{repo_name}': {e.data.get('message', str(e))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not valid_repos:
|
||||||
|
error_msg = (
|
||||||
|
"None of the specified repositories could be accessed: "
|
||||||
|
)
|
||||||
|
error_msg += ", ".join(validation_errors)
|
||||||
|
raise ConnectorValidationError(error_msg)
|
||||||
|
else:
|
||||||
|
# Single repository (backward compatibility)
|
||||||
|
test_repo = self.github_client.get_repo(
|
||||||
|
f"{self.repo_owner}/{self.repositories}"
|
||||||
|
)
|
||||||
|
test_repo.get_contents("")
|
||||||
else:
|
else:
|
||||||
# Try to get organization first
|
# Try to get organization first
|
||||||
try:
|
try:
|
||||||
@ -298,10 +372,15 @@ class GithubConnector(LoadConnector, PollConnector):
|
|||||||
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
|
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
|
||||||
)
|
)
|
||||||
elif e.status == 404:
|
elif e.status == 404:
|
||||||
if self.repo_name:
|
if self.repositories:
|
||||||
raise ConnectorValidationError(
|
if "," in self.repositories:
|
||||||
f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
|
raise ConnectorValidationError(
|
||||||
)
|
f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ConnectorValidationError(
|
||||||
|
f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ConnectorValidationError(
|
raise ConnectorValidationError(
|
||||||
f"GitHub user or organization not found: {self.repo_owner}"
|
f"GitHub user or organization not found: {self.repo_owner}"
|
||||||
@ -310,6 +389,7 @@ class GithubConnector(LoadConnector, PollConnector):
|
|||||||
raise ConnectorValidationError(
|
raise ConnectorValidationError(
|
||||||
f"Unexpected GitHub error (status={e.status}): {e.data}"
|
f"Unexpected GitHub error (status={e.status}): {e.data}"
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Unexpected error during GitHub settings validation: {exc}"
|
f"Unexpected error during GitHub settings validation: {exc}"
|
||||||
@ -321,7 +401,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
connector = GithubConnector(
|
connector = GithubConnector(
|
||||||
repo_owner=os.environ["REPO_OWNER"],
|
repo_owner=os.environ["REPO_OWNER"],
|
||||||
repo_name=os.environ["REPO_NAME"],
|
repositories=os.environ["REPOSITORIES"],
|
||||||
)
|
)
|
||||||
connector.load_credentials(
|
connector.load_credentials(
|
||||||
{"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
|
{"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
|
||||||
|
@ -45,7 +45,7 @@ def test_confluence_connector_basic(
|
|||||||
with pytest.raises(StopIteration):
|
with pytest.raises(StopIteration):
|
||||||
next(doc_batch_generator)
|
next(doc_batch_generator)
|
||||||
|
|
||||||
assert len(doc_batch) == 3
|
assert len(doc_batch) == 2
|
||||||
|
|
||||||
page_within_a_page_doc: Document | None = None
|
page_within_a_page_doc: Document | None = None
|
||||||
page_doc: Document | None = None
|
page_doc: Document | None = None
|
||||||
|
@ -40,8 +40,12 @@ export const ConnectorTitle = ({
|
|||||||
const typedConnector = connector as Connector<GithubConfig>;
|
const typedConnector = connector as Connector<GithubConfig>;
|
||||||
additionalMetadata.set(
|
additionalMetadata.set(
|
||||||
"Repo",
|
"Repo",
|
||||||
typedConnector.connector_specific_config.repo_name
|
typedConnector.connector_specific_config.repositories
|
||||||
? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
|
? `${typedConnector.connector_specific_config.repo_owner}/${
|
||||||
|
typedConnector.connector_specific_config.repositories.includes(",")
|
||||||
|
? "multiple repos"
|
||||||
|
: typedConnector.connector_specific_config.repositories
|
||||||
|
}`
|
||||||
: `${typedConnector.connector_specific_config.repo_owner}/*`
|
: `${typedConnector.connector_specific_config.repo_owner}/*`
|
||||||
);
|
);
|
||||||
} else if (connector.source === "gitlab") {
|
} else if (connector.source === "gitlab") {
|
||||||
|
@ -190,10 +190,12 @@ export const connectorConfigs: Record<
|
|||||||
fields: [
|
fields: [
|
||||||
{
|
{
|
||||||
type: "text",
|
type: "text",
|
||||||
query: "Enter the repository name:",
|
query: "Enter the repository name(s):",
|
||||||
label: "Repository Name",
|
label: "Repository Name(s)",
|
||||||
name: "repo_name",
|
name: "repositories",
|
||||||
optional: false,
|
optional: false,
|
||||||
|
description:
|
||||||
|
"For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@ -1358,7 +1360,7 @@ export interface WebConfig {
|
|||||||
|
|
||||||
export interface GithubConfig {
|
export interface GithubConfig {
|
||||||
repo_owner: string;
|
repo_owner: string;
|
||||||
repo_name: string;
|
repositories: string; // Comma-separated list of repository names
|
||||||
include_prs: boolean;
|
include_prs: boolean;
|
||||||
include_issues: boolean;
|
include_issues: boolean;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user