Add option to index all Jira projects (#4106)

* Add option to index all Jira projects

* Fix test

* Fix web build

* Address comment
This commit is contained in:
Chris Weaver
2025-02-24 18:07:00 -08:00
committed by GitHub
parent 0dce67094e
commit a99dd05533
4 changed files with 232 additions and 44 deletions

View File

@@ -0,0 +1,120 @@
"""migrate jira connectors to new format
Revision ID: da42808081e3
Revises: f13db29f3101
Create Date: 2025-02-24 11:24:54.396040
"""
from alembic import op
import sqlalchemy as sa
import json
from onyx.configs.constants import DocumentSource
from onyx.connectors.onyx_jira.utils import extract_jira_project
# revision identifiers, used by Alembic.
revision = "da42808081e3"
down_revision = "f13db29f3101"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Get all Jira connectors
conn = op.get_bind()
# First get all Jira connectors
jira_connectors = conn.execute(
sa.text(
"""
SELECT id, connector_specific_config
FROM connector
WHERE source = :source
"""
),
{"source": DocumentSource.JIRA.value.upper()},
).fetchall()
# Update each connector's config
for connector_id, old_config in jira_connectors:
if not old_config:
continue
# Extract project key from URL if it exists
new_config: dict[str, str | None] = {}
if project_url := old_config.get("jira_project_url"):
# Parse the URL to get base and project
try:
jira_base, project_key = extract_jira_project(project_url)
new_config = {"jira_base_url": jira_base, "project_key": project_key}
except ValueError:
# If URL parsing fails, just use the URL as the base
new_config = {
"jira_base_url": project_url.split("/projects/")[0],
"project_key": None,
}
else:
# For connectors without a project URL, we need admin intervention
# Mark these for review
print(
f"WARNING: Jira connector {connector_id} has no project URL configured"
)
continue
# Update the connector config
conn.execute(
sa.text(
"""
UPDATE connector
SET connector_specific_config = :new_config
WHERE id = :id
"""
),
{"id": connector_id, "new_config": json.dumps(new_config)},
)
def downgrade() -> None:
# Get all Jira connectors
conn = op.get_bind()
# First get all Jira connectors
jira_connectors = conn.execute(
sa.text(
"""
SELECT id, connector_specific_config
FROM connector
WHERE source = :source
"""
),
{"source": DocumentSource.JIRA.value.upper()},
).fetchall()
# Update each connector's config back to the old format
for connector_id, new_config in jira_connectors:
if not new_config:
continue
old_config = {}
base_url = new_config.get("jira_base_url")
project_key = new_config.get("project_key")
if base_url and project_key:
old_config = {"jira_project_url": f"{base_url}/projects/{project_key}"}
elif base_url:
old_config = {"jira_project_url": base_url}
else:
continue
# Update the connector config
conn.execute(
sa.text(
"""
UPDATE connector
SET connector_specific_config = :old_config
WHERE id = :id
"""
),
{"id": connector_id, "old_config": old_config},
)

View File

@@ -29,7 +29,6 @@ from onyx.connectors.onyx_jira.utils import best_effort_basic_expert_info
from onyx.connectors.onyx_jira.utils import best_effort_get_field_from_issue
from onyx.connectors.onyx_jira.utils import build_jira_client
from onyx.connectors.onyx_jira.utils import build_jira_url
from onyx.connectors.onyx_jira.utils import extract_jira_project
from onyx.connectors.onyx_jira.utils import extract_text_from_adf
from onyx.connectors.onyx_jira.utils import get_comment_strs
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
@@ -160,7 +159,8 @@ def fetch_jira_issues_batch(
class JiraConnector(LoadConnector, PollConnector, SlimConnector):
def __init__(
self,
jira_project_url: str,
jira_base_url: str,
project_key: str | None = None,
comment_email_blacklist: list[str] | None = None,
batch_size: int = INDEX_BATCH_SIZE,
# if a ticket has one of the labels specified in this list, we will just
@@ -169,12 +169,13 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,
) -> None:
self.batch_size = batch_size
self.jira_base, self._jira_project = extract_jira_project(jira_project_url)
self._jira_client: JIRA | None = None
self.jira_base = jira_base_url.rstrip("/") # Remove trailing slash if present
self.jira_project = project_key
self._comment_email_blacklist = comment_email_blacklist or []
self.labels_to_skip = set(labels_to_skip)
self._jira_client: JIRA | None = None
@property
def comment_email_blacklist(self) -> tuple:
return tuple(email.strip() for email in self._comment_email_blacklist)
@@ -188,7 +189,9 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
@property
def quoted_jira_project(self) -> str:
# Quote the project name to handle reserved words
return f'"{self._jira_project}"'
if not self.jira_project:
return ""
return f'"{self.jira_project}"'
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self._jira_client = build_jira_client(
@@ -197,8 +200,14 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
)
return None
def _get_jql_query(self) -> str:
"""Get the JQL query based on whether a specific project is set"""
if self.jira_project:
return f"project = {self.quoted_jira_project}"
return "" # Empty string means all accessible projects
def load_from_state(self) -> GenerateDocumentsOutput:
jql = f"project = {self.quoted_jira_project}"
jql = self._get_jql_query()
document_batch = []
for doc in fetch_jira_issues_batch(
@@ -225,11 +234,10 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
"%Y-%m-%d %H:%M"
)
base_jql = self._get_jql_query()
jql = (
f"project = {self.quoted_jira_project} AND "
f"updated >= '{start_date_str}' AND "
f"updated <= '{end_date_str}'"
)
f"{base_jql} AND " if base_jql else ""
) + f"updated >= '{start_date_str}' AND updated <= '{end_date_str}'"
document_batch = []
for doc in fetch_jira_issues_batch(
@@ -252,7 +260,7 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
jql = f"project = {self.quoted_jira_project}"
jql = self._get_jql_query()
slim_doc_batch = []
for issue in _paginate_jql_search(
@@ -279,43 +287,63 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
if self._jira_client is None:
raise ConnectorMissingCredentialError("Jira")
if not self._jira_project:
raise ConnectorValidationError(
"Invalid connector settings: 'jira_project' must be provided."
)
# If a specific project is set, validate it exists
if self.jira_project:
try:
self.jira_client.project(self.jira_project)
except Exception as e:
status_code = getattr(e, "status_code", None)
try:
self.jira_client.project(self._jira_project)
if status_code == 401:
raise CredentialExpiredError(
"Jira credential appears to be expired or invalid (HTTP 401)."
)
elif status_code == 403:
raise InsufficientPermissionsError(
"Your Jira token does not have sufficient permissions for this project (HTTP 403)."
)
elif status_code == 404:
raise ConnectorValidationError(
f"Jira project not found with key: {self.jira_project}"
)
elif status_code == 429:
raise ConnectorValidationError(
"Validation failed due to Jira rate-limits being exceeded. Please try again later."
)
except Exception as e:
status_code = getattr(e, "status_code", None)
raise RuntimeError(f"Unexpected Jira error during validation: {e}")
else:
# If no project specified, validate we can access the Jira API
try:
# Try to list projects to validate access
self.jira_client.projects()
except Exception as e:
status_code = getattr(e, "status_code", None)
if status_code == 401:
raise CredentialExpiredError(
"Jira credential appears to be expired or invalid (HTTP 401)."
)
elif status_code == 403:
raise InsufficientPermissionsError(
"Your Jira token does not have sufficient permissions to list projects (HTTP 403)."
)
elif status_code == 429:
raise ConnectorValidationError(
"Validation failed due to Jira rate-limits being exceeded. Please try again later."
)
if status_code == 401:
raise CredentialExpiredError(
"Jira credential appears to be expired or invalid (HTTP 401)."
)
elif status_code == 403:
raise InsufficientPermissionsError(
"Your Jira token does not have sufficient permissions for this project (HTTP 403)."
)
elif status_code == 404:
raise ConnectorValidationError(
f"Jira project not found with key: {self._jira_project}"
)
elif status_code == 429:
raise ConnectorValidationError(
"Validation failed due to Jira rate-limits being exceeded. Please try again later."
)
else:
raise Exception(f"Unexpected Jira error during validation: {e}")
raise RuntimeError(f"Unexpected Jira error during validation: {e}")
if __name__ == "__main__":
import os
connector = JiraConnector(
os.environ["JIRA_PROJECT_URL"], comment_email_blacklist=[]
jira_base_url=os.environ["JIRA_BASE_URL"],
project_key=os.environ.get("JIRA_PROJECT_KEY"),
comment_email_blacklist=[],
)
connector.load_credentials(
{
"jira_user_email": os.environ["JIRA_USER_EMAIL"],

View File

@@ -10,7 +10,8 @@ from onyx.connectors.onyx_jira.connector import JiraConnector
@pytest.fixture
def jira_connector() -> JiraConnector:
connector = JiraConnector(
"https://danswerai.atlassian.net/jira/software/c/projects/AS/boards/6",
jira_base_url="https://danswerai.atlassian.net",
project_key="AS",
comment_email_blacklist=[],
)
connector.load_credentials(

View File

@@ -462,14 +462,52 @@ export const connectorConfigs: Record<
},
jira: {
description: "Configure Jira connector",
subtext: `Specify any link to a Jira page below and click "Index" to Index. Based on the provided link, we will index the ENTIRE PROJECT, not just the specified page. For example, entering https://onyx.atlassian.net/jira/software/projects/DAN/boards/1 and clicking the Index button will index the whole DAN Jira project.`,
subtext: `Configure which Jira content to index. You can index everything or specify a particular project.`,
values: [
{
type: "text",
query: "Enter the Jira project URL:",
label: "Jira Project URL",
name: "jira_project_url",
query: "Enter the Jira base URL:",
label: "Jira Base URL",
name: "jira_base_url",
optional: false,
description:
"The base URL of your Jira instance (e.g., https://your-domain.atlassian.net)",
},
{
type: "tab",
name: "indexing_scope",
label: "How Should We Index Your Jira?",
optional: true,
tabs: [
{
value: "everything",
label: "Everything",
fields: [
{
type: "string_tab",
label: "Everything",
name: "everything",
description:
"This connector will index all issues the provided credentials have access to!",
},
],
},
{
value: "project",
label: "Project",
fields: [
{
type: "text",
query: "Enter the project key:",
label: "Project Key",
name: "project_key",
description:
"The key of a specific project to index (e.g., 'PROJ').",
},
],
},
],
defaultTab: "everything",
},
{
type: "list",
@@ -1309,6 +1347,7 @@ export interface ConfluenceConfig {
export interface JiraConfig {
jira_project_url: string;
project_key?: string;
comment_email_blacklist?: string[];
}