diff --git a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py new file mode 100644 index 000000000..d411d68a9 --- /dev/null +++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py @@ -0,0 +1,125 @@ +"""Update GitHub connector repo_name to repositories + +Revision ID: 3934b1bc7b62 +Revises: b7c2b63c4a03 +Create Date: 2025-03-05 10:50:30.516962 + +""" +from alembic import op +import sqlalchemy as sa +import json +import logging + +# revision identifiers, used by Alembic. +revision = "3934b1bc7b62" +down_revision = "b7c2b63c4a03" +branch_labels = None +depends_on = None + +logger = logging.getLogger("alembic.runtime.migration") + + +def upgrade() -> None: + # Get all GitHub connectors + conn = op.get_bind() + + # First get all GitHub connectors + github_connectors = conn.execute( + sa.text( + """ + SELECT id, connector_specific_config + FROM connector + WHERE source = 'GITHUB' + """ + ) + ).fetchall() + + # Update each connector's config + updated_count = 0 + for connector_id, config in github_connectors: + try: + if not config: + logger.warning(f"Connector {connector_id} has no config, skipping") + continue + + # Parse the config if it's a string + if isinstance(config, str): + config = json.loads(config) + + if "repo_name" not in config: + continue + + # Create new config with repositories instead of repo_name + new_config = dict(config) + repo_name_value = new_config.pop("repo_name") + new_config["repositories"] = repo_name_value + + # Update the connector with the new config + conn.execute( + sa.text( + """ + UPDATE connector + SET connector_specific_config = :new_config + WHERE id = :connector_id + """ + ), + {"connector_id": connector_id, "new_config": json.dumps(new_config)}, + ) + updated_count += 1 + except Exception as e: + logger.error(f"Error updating connector {connector_id}: {str(e)}") + + +def downgrade() -> None: + # Get all GitHub connectors + conn = op.get_bind() + + logger.debug( + "Starting rollback of GitHub connectors from repositories to repo_name" + ) + + github_connectors = conn.execute( + sa.text( + """ + SELECT id, connector_specific_config + FROM connector + WHERE source = 'GITHUB' + """ + ) + ).fetchall() + + logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback") + + # Revert each GitHub connector to use repo_name instead of repositories + reverted_count = 0 + for connector_id, config in github_connectors: + try: + if not config: + continue + + # Parse the config if it's a string + if isinstance(config, str): + config = json.loads(config) + + if "repositories" not in config: + continue + + # Create new config with repo_name instead of repositories + new_config = dict(config) + repositories_value = new_config.pop("repositories") + new_config["repo_name"] = repositories_value + + # Update the connector with the new config + conn.execute( + sa.text( + """ + UPDATE connector + SET connector_specific_config = :new_config + WHERE id = :connector_id + """ + ), + {"new_config": json.dumps(new_config), "connector_id": connector_id}, + ) + reverted_count += 1 + except Exception as e: + logger.error(f"Error reverting connector {connector_id}: {str(e)}") diff --git a/backend/onyx/connectors/github/connector.py b/backend/onyx/connectors/github/connector.py index 96349d29f..62a526a72 100644 --- a/backend/onyx/connectors/github/connector.py +++ b/backend/onyx/connectors/github/connector.py @@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector): def __init__( self, repo_owner: str, - repo_name: str | None = None, + repositories: str | None = None, batch_size: int = INDEX_BATCH_SIZE, state_filter: str = "all", include_prs: bool = True, include_issues: bool = False, ) -> None: self.repo_owner = repo_owner - self.repo_name = repo_name + self.repositories = repositories self.batch_size = batch_size self.state_filter = state_filter self.include_prs = include_prs @@ -157,11 +157,42 @@ class GithubConnector(LoadConnector, PollConnector): ) try: - return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}") + return github_client.get_repo(f"{self.repo_owner}/{self.repositories}") except RateLimitExceededException: _sleep_after_rate_limit_exception(github_client) return self._get_github_repo(github_client, attempt_num + 1) + def _get_github_repos( + self, github_client: Github, attempt_num: int = 0 + ) -> list[Repository.Repository]: + """Get specific repositories based on comma-separated repo_name string.""" + if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES: + raise RuntimeError( + "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github" + ) + + try: + repos = [] + # Split repo_name by comma and strip whitespace + repo_names = [ + name.strip() for name in (cast(str, self.repositories)).split(",") + ] + + for repo_name in repo_names: + if repo_name: # Skip empty strings + try: + repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}") + repos.append(repo) + except GithubException as e: + logger.warning( + f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}" + ) + + return repos + except RateLimitExceededException: + _sleep_after_rate_limit_exception(github_client) + return self._get_github_repos(github_client, attempt_num + 1) + def _get_all_repos( self, github_client: Github, attempt_num: int = 0 ) -> list[Repository.Repository]: @@ -189,11 +220,17 @@ class GithubConnector(LoadConnector, PollConnector): if self.github_client is None: raise ConnectorMissingCredentialError("GitHub") - repos = ( - [self._get_github_repo(self.github_client)] - if self.repo_name - else self._get_all_repos(self.github_client) - ) + repos = [] + if self.repositories: + if "," in self.repositories: + # Multiple repositories specified + repos = self._get_github_repos(self.github_client) + else: + # Single repository (backward compatibility) + repos = [self._get_github_repo(self.github_client)] + else: + # All repositories + repos = self._get_all_repos(self.github_client) for repo in repos: if self.include_prs: @@ -268,11 +305,48 @@ class GithubConnector(LoadConnector, PollConnector): ) try: - if self.repo_name: - test_repo = self.github_client.get_repo( - f"{self.repo_owner}/{self.repo_name}" - ) - test_repo.get_contents("") + if self.repositories: + if "," in self.repositories: + # Multiple repositories specified + repo_names = [name.strip() for name in self.repositories.split(",")] + if not repo_names: + raise ConnectorValidationError( + "Invalid connector settings: No valid repository names provided." + ) + + # Validate at least one repository exists and is accessible + valid_repos = False + validation_errors = [] + + for repo_name in repo_names: + if not repo_name: + continue + + try: + test_repo = self.github_client.get_repo( + f"{self.repo_owner}/{repo_name}" + ) + test_repo.get_contents("") + valid_repos = True + # If at least one repo is valid, we can proceed + break + except GithubException as e: + validation_errors.append( + f"Repository '{repo_name}': {e.data.get('message', str(e))}" + ) + + if not valid_repos: + error_msg = ( + "None of the specified repositories could be accessed: " + ) + error_msg += ", ".join(validation_errors) + raise ConnectorValidationError(error_msg) + else: + # Single repository (backward compatibility) + test_repo = self.github_client.get_repo( + f"{self.repo_owner}/{self.repositories}" + ) + test_repo.get_contents("") else: # Try to get organization first try: @@ -298,10 +372,15 @@ class GithubConnector(LoadConnector, PollConnector): "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)." ) elif e.status == 404: - if self.repo_name: - raise ConnectorValidationError( - f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}" - ) + if self.repositories: + if "," in self.repositories: + raise ConnectorValidationError( + f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}" + ) + else: + raise ConnectorValidationError( + f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}" + ) else: raise ConnectorValidationError( f"GitHub user or organization not found: {self.repo_owner}" @@ -310,6 +389,7 @@ class GithubConnector(LoadConnector, PollConnector): raise ConnectorValidationError( f"Unexpected GitHub error (status={e.status}): {e.data}" ) + except Exception as exc: raise Exception( f"Unexpected error during GitHub settings validation: {exc}" @@ -321,7 +401,7 @@ if __name__ == "__main__": connector = GithubConnector( repo_owner=os.environ["REPO_OWNER"], - repo_name=os.environ["REPO_NAME"], + repositories=os.environ["REPOSITORIES"], ) connector.load_credentials( {"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]} diff --git a/backend/tests/daily/connectors/confluence/test_confluence_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_basic.py index 7cc80fb2f..4da3e7e53 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py @@ -45,7 +45,7 @@ def test_confluence_connector_basic( with pytest.raises(StopIteration): next(doc_batch_generator) - assert len(doc_batch) == 3 + assert len(doc_batch) == 2 page_within_a_page_doc: Document | None = None page_doc: Document | None = None diff --git a/web/src/components/admin/connectors/ConnectorTitle.tsx b/web/src/components/admin/connectors/ConnectorTitle.tsx index 525f7f226..babd130f1 100644 --- a/web/src/components/admin/connectors/ConnectorTitle.tsx +++ b/web/src/components/admin/connectors/ConnectorTitle.tsx @@ -40,8 +40,12 @@ export const ConnectorTitle = ({ const typedConnector = connector as Connector; additionalMetadata.set( "Repo", - typedConnector.connector_specific_config.repo_name - ? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}` + typedConnector.connector_specific_config.repositories + ? `${typedConnector.connector_specific_config.repo_owner}/${ + typedConnector.connector_specific_config.repositories.includes(",") + ? "multiple repos" + : typedConnector.connector_specific_config.repositories + }` : `${typedConnector.connector_specific_config.repo_owner}/*` ); } else if (connector.source === "gitlab") { diff --git a/web/src/lib/connectors/connectors.tsx b/web/src/lib/connectors/connectors.tsx index 99ccde947..e26631864 100644 --- a/web/src/lib/connectors/connectors.tsx +++ b/web/src/lib/connectors/connectors.tsx @@ -190,10 +190,12 @@ export const connectorConfigs: Record< fields: [ { type: "text", - query: "Enter the repository name:", - label: "Repository Name", - name: "repo_name", + query: "Enter the repository name(s):", + label: "Repository Name(s)", + name: "repositories", optional: false, + description: + "For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)", }, ], }, @@ -1358,7 +1360,7 @@ export interface WebConfig { export interface GithubConfig { repo_owner: string; - repo_name: string; + repositories: string; // Comma-separated list of repository names include_prs: boolean; include_issues: boolean; }