From a99dd05533509224c480ab31c59ad23dfe73b7fa Mon Sep 17 00:00:00 2001 From: Chris Weaver <25087905+Weves@users.noreply.github.com> Date: Mon, 24 Feb 2025 18:07:00 -0800 Subject: [PATCH] Add option to index all Jira projects (#4106) * Add option to index all Jira projects * Fix test * Fix web build * Address comment --- ...3_migrate_jira_connectors_to_new_format.py | 120 ++++++++++++++++++ .../onyx/connectors/onyx_jira/connector.py | 106 ++++++++++------ .../daily/connectors/jira/test_jira_basic.py | 3 +- web/src/lib/connectors/connectors.tsx | 47 ++++++- 4 files changed, 232 insertions(+), 44 deletions(-) create mode 100644 backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py diff --git a/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py b/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py new file mode 100644 index 000000000000..44931b90ed0d --- /dev/null +++ b/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py @@ -0,0 +1,120 @@ +"""migrate jira connectors to new format + +Revision ID: da42808081e3 +Revises: f13db29f3101 +Create Date: 2025-02-24 11:24:54.396040 + +""" +from alembic import op +import sqlalchemy as sa +import json + +from onyx.configs.constants import DocumentSource +from onyx.connectors.onyx_jira.utils import extract_jira_project + + +# revision identifiers, used by Alembic. +revision = "da42808081e3" +down_revision = "f13db29f3101" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Get all Jira connectors + conn = op.get_bind() + + # First get all Jira connectors + jira_connectors = conn.execute( + sa.text( + """ + SELECT id, connector_specific_config + FROM connector + WHERE source = :source + """ + ), + {"source": DocumentSource.JIRA.value.upper()}, + ).fetchall() + + # Update each connector's config + for connector_id, old_config in jira_connectors: + if not old_config: + continue + + # Extract project key from URL if it exists + new_config: dict[str, str | None] = {} + if project_url := old_config.get("jira_project_url"): + # Parse the URL to get base and project + try: + jira_base, project_key = extract_jira_project(project_url) + new_config = {"jira_base_url": jira_base, "project_key": project_key} + except ValueError: + # If URL parsing fails, just use the URL as the base + new_config = { + "jira_base_url": project_url.split("/projects/")[0], + "project_key": None, + } + else: + # For connectors without a project URL, we need admin intervention + # Mark these for review + print( + f"WARNING: Jira connector {connector_id} has no project URL configured" + ) + continue + + # Update the connector config + conn.execute( + sa.text( + """ + UPDATE connector + SET connector_specific_config = :new_config + WHERE id = :id + """ + ), + {"id": connector_id, "new_config": json.dumps(new_config)}, + ) + + +def downgrade() -> None: + # Get all Jira connectors + conn = op.get_bind() + + # First get all Jira connectors + jira_connectors = conn.execute( + sa.text( + """ + SELECT id, connector_specific_config + FROM connector + WHERE source = :source + """ + ), + {"source": DocumentSource.JIRA.value.upper()}, + ).fetchall() + + # Update each connector's config back to the old format + for connector_id, new_config in jira_connectors: + if not new_config: + continue + + old_config = {} + base_url = new_config.get("jira_base_url") + project_key = new_config.get("project_key") + + if base_url and project_key: + old_config = {"jira_project_url": f"{base_url}/projects/{project_key}"} + elif base_url: + old_config = {"jira_project_url": base_url} + else: + continue + + # Update the connector config + conn.execute( + sa.text( + """ + UPDATE connector + SET connector_specific_config = :old_config + WHERE id = :id + """ + ), + {"id": connector_id, "old_config": old_config}, + ) diff --git a/backend/onyx/connectors/onyx_jira/connector.py b/backend/onyx/connectors/onyx_jira/connector.py index 89eec0b165c9..1c784b5aa6d7 100644 --- a/backend/onyx/connectors/onyx_jira/connector.py +++ b/backend/onyx/connectors/onyx_jira/connector.py @@ -29,7 +29,6 @@ from onyx.connectors.onyx_jira.utils import best_effort_basic_expert_info from onyx.connectors.onyx_jira.utils import best_effort_get_field_from_issue from onyx.connectors.onyx_jira.utils import build_jira_client from onyx.connectors.onyx_jira.utils import build_jira_url -from onyx.connectors.onyx_jira.utils import extract_jira_project from onyx.connectors.onyx_jira.utils import extract_text_from_adf from onyx.connectors.onyx_jira.utils import get_comment_strs from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface @@ -160,7 +159,8 @@ def fetch_jira_issues_batch( class JiraConnector(LoadConnector, PollConnector, SlimConnector): def __init__( self, - jira_project_url: str, + jira_base_url: str, + project_key: str | None = None, comment_email_blacklist: list[str] | None = None, batch_size: int = INDEX_BATCH_SIZE, # if a ticket has one of the labels specified in this list, we will just @@ -169,12 +169,13 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector): labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP, ) -> None: self.batch_size = batch_size - self.jira_base, self._jira_project = extract_jira_project(jira_project_url) - self._jira_client: JIRA | None = None + self.jira_base = jira_base_url.rstrip("/") # Remove trailing slash if present + self.jira_project = project_key self._comment_email_blacklist = comment_email_blacklist or [] - self.labels_to_skip = set(labels_to_skip) + self._jira_client: JIRA | None = None + @property def comment_email_blacklist(self) -> tuple: return tuple(email.strip() for email in self._comment_email_blacklist) @@ -188,7 +189,9 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector): @property def quoted_jira_project(self) -> str: # Quote the project name to handle reserved words - return f'"{self._jira_project}"' + if not self.jira_project: + return "" + return f'"{self.jira_project}"' def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: self._jira_client = build_jira_client( @@ -197,8 +200,14 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector): ) return None + def _get_jql_query(self) -> str: + """Get the JQL query based on whether a specific project is set""" + if self.jira_project: + return f"project = {self.quoted_jira_project}" + return "" # Empty string means all accessible projects + def load_from_state(self) -> GenerateDocumentsOutput: - jql = f"project = {self.quoted_jira_project}" + jql = self._get_jql_query() document_batch = [] for doc in fetch_jira_issues_batch( @@ -225,11 +234,10 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector): "%Y-%m-%d %H:%M" ) + base_jql = self._get_jql_query() jql = ( - f"project = {self.quoted_jira_project} AND " - f"updated >= '{start_date_str}' AND " - f"updated <= '{end_date_str}'" - ) + f"{base_jql} AND " if base_jql else "" + ) + f"updated >= '{start_date_str}' AND updated <= '{end_date_str}'" document_batch = [] for doc in fetch_jira_issues_batch( @@ -252,7 +260,7 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector): end: SecondsSinceUnixEpoch | None = None, callback: IndexingHeartbeatInterface | None = None, ) -> GenerateSlimDocumentOutput: - jql = f"project = {self.quoted_jira_project}" + jql = self._get_jql_query() slim_doc_batch = [] for issue in _paginate_jql_search( @@ -279,43 +287,63 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector): if self._jira_client is None: raise ConnectorMissingCredentialError("Jira") - if not self._jira_project: - raise ConnectorValidationError( - "Invalid connector settings: 'jira_project' must be provided." - ) + # If a specific project is set, validate it exists + if self.jira_project: + try: + self.jira_client.project(self.jira_project) + except Exception as e: + status_code = getattr(e, "status_code", None) - try: - self.jira_client.project(self._jira_project) + if status_code == 401: + raise CredentialExpiredError( + "Jira credential appears to be expired or invalid (HTTP 401)." + ) + elif status_code == 403: + raise InsufficientPermissionsError( + "Your Jira token does not have sufficient permissions for this project (HTTP 403)." + ) + elif status_code == 404: + raise ConnectorValidationError( + f"Jira project not found with key: {self.jira_project}" + ) + elif status_code == 429: + raise ConnectorValidationError( + "Validation failed due to Jira rate-limits being exceeded. Please try again later." + ) - except Exception as e: - status_code = getattr(e, "status_code", None) + raise RuntimeError(f"Unexpected Jira error during validation: {e}") + else: + # If no project specified, validate we can access the Jira API + try: + # Try to list projects to validate access + self.jira_client.projects() + except Exception as e: + status_code = getattr(e, "status_code", None) + if status_code == 401: + raise CredentialExpiredError( + "Jira credential appears to be expired or invalid (HTTP 401)." + ) + elif status_code == 403: + raise InsufficientPermissionsError( + "Your Jira token does not have sufficient permissions to list projects (HTTP 403)." + ) + elif status_code == 429: + raise ConnectorValidationError( + "Validation failed due to Jira rate-limits being exceeded. Please try again later." + ) - if status_code == 401: - raise CredentialExpiredError( - "Jira credential appears to be expired or invalid (HTTP 401)." - ) - elif status_code == 403: - raise InsufficientPermissionsError( - "Your Jira token does not have sufficient permissions for this project (HTTP 403)." - ) - elif status_code == 404: - raise ConnectorValidationError( - f"Jira project not found with key: {self._jira_project}" - ) - elif status_code == 429: - raise ConnectorValidationError( - "Validation failed due to Jira rate-limits being exceeded. Please try again later." - ) - else: - raise Exception(f"Unexpected Jira error during validation: {e}") + raise RuntimeError(f"Unexpected Jira error during validation: {e}") if __name__ == "__main__": import os connector = JiraConnector( - os.environ["JIRA_PROJECT_URL"], comment_email_blacklist=[] + jira_base_url=os.environ["JIRA_BASE_URL"], + project_key=os.environ.get("JIRA_PROJECT_KEY"), + comment_email_blacklist=[], ) + connector.load_credentials( { "jira_user_email": os.environ["JIRA_USER_EMAIL"], diff --git a/backend/tests/daily/connectors/jira/test_jira_basic.py b/backend/tests/daily/connectors/jira/test_jira_basic.py index e53f30f29168..cf7d14fbd233 100644 --- a/backend/tests/daily/connectors/jira/test_jira_basic.py +++ b/backend/tests/daily/connectors/jira/test_jira_basic.py @@ -10,7 +10,8 @@ from onyx.connectors.onyx_jira.connector import JiraConnector @pytest.fixture def jira_connector() -> JiraConnector: connector = JiraConnector( - "https://danswerai.atlassian.net/jira/software/c/projects/AS/boards/6", + jira_base_url="https://danswerai.atlassian.net", + project_key="AS", comment_email_blacklist=[], ) connector.load_credentials( diff --git a/web/src/lib/connectors/connectors.tsx b/web/src/lib/connectors/connectors.tsx index cd5b17fed68d..da4dd54a3ac0 100644 --- a/web/src/lib/connectors/connectors.tsx +++ b/web/src/lib/connectors/connectors.tsx @@ -462,14 +462,52 @@ export const connectorConfigs: Record< }, jira: { description: "Configure Jira connector", - subtext: `Specify any link to a Jira page below and click "Index" to Index. Based on the provided link, we will index the ENTIRE PROJECT, not just the specified page. For example, entering https://onyx.atlassian.net/jira/software/projects/DAN/boards/1 and clicking the Index button will index the whole DAN Jira project.`, + subtext: `Configure which Jira content to index. You can index everything or specify a particular project.`, values: [ { type: "text", - query: "Enter the Jira project URL:", - label: "Jira Project URL", - name: "jira_project_url", + query: "Enter the Jira base URL:", + label: "Jira Base URL", + name: "jira_base_url", optional: false, + description: + "The base URL of your Jira instance (e.g., https://your-domain.atlassian.net)", + }, + { + type: "tab", + name: "indexing_scope", + label: "How Should We Index Your Jira?", + optional: true, + tabs: [ + { + value: "everything", + label: "Everything", + fields: [ + { + type: "string_tab", + label: "Everything", + name: "everything", + description: + "This connector will index all issues the provided credentials have access to!", + }, + ], + }, + { + value: "project", + label: "Project", + fields: [ + { + type: "text", + query: "Enter the project key:", + label: "Project Key", + name: "project_key", + description: + "The key of a specific project to index (e.g., 'PROJ').", + }, + ], + }, + ], + defaultTab: "everything", }, { type: "list", @@ -1309,6 +1347,7 @@ export interface ConfluenceConfig { export interface JiraConfig { jira_project_url: string; + project_key?: string; comment_email_blacklist?: string[]; }